[ { "description": "ニコニコ実況過去ログアーカイブニコニコ実況過去ログアーカイブは、ニコニコ実況のサービス開始から現在までのすべての過去ログコメントを収集したデータセットです。 ", "url": "https://huggingface.co/datasets/KakologArchives/KakologArchives", "project_name": "KakologArchives", "downloads": 5242297, "source": "Hugging Face", "score": 37.163620376236246, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Fine-tuned XLSR-53 large model for speech recognition in Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the train and validation splits of Common Voice 6.1, CSS10 and JSUT.", "url": "https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-japanese", "project_name": "wav2vec2-large-xlsr-53-japanese", "downloads": 2780560, "source": "Hugging Face", "score": 19.68743998640711, "first_commit": "2021-04-16 00:20:03", "latest_commit": "2022-12-14 01:58:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Text Generation", "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "Official Implementation of OCR-free Document Understanding Transformer (Donut) and Synthetic Document Generator (SynthDoG), ECCV 2022", "url": "https://github.com/clovaai/donut", "project_name": "donut", "stargazers_count": 6133, "source": "GitHub", "score": 17.05209097762495, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Neologism dictionary based on the language resources on the Web for mecab-ipadic", "url": "https://github.com/neologd/mecab-ipadic-neologd", "project_name": "mecab-ipadic-neologd", "stargazers_count": 2742, "source": "GitHub", "score": 7.448128849620529, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXのエディター", "url": "https://github.com/VOICEVOX/voicevox", "project_name": "voicevox", "stargazers_count": 2674, "source": "GitHub", "score": 7.25553981255055, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "a Japanese Input Method Editor designed for multi-platform", "url": "https://github.com/google/mozc", "project_name": "mozc", "stargazers_count": 2549, "source": "GitHub", "score": 6.9015158473483815, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "About Optical character recognition for Japanese text, with the main focus being Japanese manga", "url": "https://github.com/kha-white/manga-ocr", "project_name": "manga-ocr", "stargazers_count": 2016, "source": "GitHub", "score": 5.391957659726336, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Engineer Vocabulary List in Japanese/English", "url": "https://github.com/mercari/engineer-vocabulary-list", "project_name": "engineer-vocabulary-list", "stargazers_count": 1805, "source": "GitHub", "score": 4.794365206465076, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality" ] }, { "description": "xlm-roberta-ner-japanese (Japanese caption : 日本語の固有表現抽出のモデル)", "url": "https://huggingface.co/tsmatz/xlm-roberta-ner-japanese", "project_name": "xlm-roberta-ner-japanese", "downloads": 631844, "source": "Hugging Face", "score": 4.433435020599588, "first_commit": "2022-10-24 02:08:37", "latest_commit": "2024-07-12 00:01:56", "languages": [], "model_or_dataset": "model", "model_size": 0.277, "model_architectures": "RobertaForTokenClassification", "multi_labels": [ "Multilinguality", "Information Extraction & Text Mining", "Cross-Lingual Transfer", "Named Entity Recognition", "Language Models", "Semantic Text Processing" ] }, { "description": "Whisper based Japanese subtitle generator", "url": "https://github.com/Ayanaminn/N46Whisper", "project_name": "N46Whisper", "stargazers_count": 1651, "source": "GitHub", "score": 4.358207681336005, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Text Generation", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Ruri: Japanese General Text Embeddings Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-base", "project_name": "ruri-base", "downloads": 531071, "source": "Hugging Face", "score": 3.7180348269866013, "first_commit": "2024-08-28 13:09:10", "latest_commit": "2024-09-04 08:49:23", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertModel", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "What’s this?", "url": "https://huggingface.co/globis-university/deberta-v3-japanese-large", "project_name": "deberta-v3-japanese-large", "downloads": 528819, "source": "Hugging Face", "score": 3.702047595922562, "first_commit": "2023-09-21 16:15:15", "latest_commit": "2024-07-05 05:50:06", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing", "Morphology" ] }, { "description": "無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXの音声合成エンジン", "url": "https://github.com/VOICEVOX/voicevox_engine", "project_name": "voicevox_engine", "stargazers_count": 1415, "source": "GitHub", "score": 3.6898104350343113, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "おじさんがLINEやメールで送ってきそうな文を生成する", "url": "https://github.com/greymd/ojichat", "project_name": "ojichat", "stargazers_count": 1257, "source": "GitHub", "score": 3.2423241430187706, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Read Japanese manga inside browser with selectable text.", "url": "https://github.com/kha-white/mokuro", "project_name": "mokuro", "stargazers_count": 1148, "source": "GitHub", "score": 2.9336152453624798, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Visual Data in NLP", "Dialogue Systems & Conversational Agents", "Multimodality" ] }, { "description": "オープンソースの日本語LLMまとめ", "url": "https://github.com/llm-jp/awesome-japanese-llm", "project_name": "awesome-japanese-llm", "stargazers_count": 1126, "source": "GitHub", "score": 2.8713070274868984, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "Japanese pop-up dictionary extension for Chrome and Firefox.", "url": "https://github.com/FooSoft/yomichan", "project_name": "yomichan", "stargazers_count": 1083, "source": "GitHub", "score": 2.7495227834573526, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Yet another Japanese morphological analyzer", "url": "https://github.com/taku910/mecab", "project_name": "mecab", "stargazers_count": 985, "source": "GitHub", "score": 2.4719679947388524, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Kuromoji is a self-contained and very easy to use Japanese morphological analyzer designed for search", "url": "https://github.com/atilika/kuromoji", "project_name": "kuromoji", "stargazers_count": 974, "source": "GitHub", "score": 2.4408138858010617, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Information Retrieval", "Tagging", "Morphology" ] }, { "description": "無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXのコア", "url": "https://github.com/VOICEVOX/voicevox_core", "project_name": "voicevox_core", "stargazers_count": 932, "source": "GitHub", "score": 2.3218618334931334, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "オープンソースの住所正規化ライブラリ。", "url": "https://github.com/geolonia/normalize-japanese-addresses", "project_name": "normalize-japanese-addresses", "stargazers_count": 902, "source": "GitHub", "score": 2.2368960818446126, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "JavaScript implementation of Japanese morphological analyzer", "url": "https://github.com/takuyaa/kuromoji.js", "project_name": "kuromoji.js", "stargazers_count": 890, "source": "GitHub", "score": 2.2029097811852045, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Japanese language library for converting Japanese sentence to Hiragana, Katakana or Romaji with furigana and okurigana modes supported.", "url": "https://github.com/hexenq/kuroshiro", "project_name": "kuroshiro", "stargazers_count": 880, "source": "GitHub", "score": 2.1745878639690313, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Japanese morphological analysis engine written in pure Python", "url": "https://github.com/mocobeta/janome", "project_name": "janome", "stargazers_count": 871, "source": "GitHub", "score": 2.149098138474475, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Tagging", "Morphology" ] }, { "description": "ChatdollKit enables you to make your 3D model into a chatbot", "url": "https://github.com/uezo/ChatdollKit", "project_name": "ChatdollKit", "stargazers_count": 867, "source": "GitHub", "score": 2.1377693715880057, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "Self-contained Japanese Morphological Analyzer written in pure Go", "url": "https://github.com/ikawaha/kagome", "project_name": "kagome", "stargazers_count": 856, "source": "GitHub", "score": 2.106615262650215, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "BERT base Japanese (unidic-lite with whole word masking, CC-100 and jawiki-20230102)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-v3", "project_name": "bert-base-japanese-v3", "downloads": 290461, "source": "Hugging Face", "score": 2.0099141936302396, "first_commit": "2023-05-19 00:13:53", "latest_commit": "2023-05-19 00:31:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForPreTraining", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Javascript library for detecting and transliterating Hiragana <--> Katakana <--> Romaji", "url": "https://github.com/WaniKani/WanaKana", "project_name": "WanaKana", "stargazers_count": 810, "source": "GitHub", "score": 1.976334443455817, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "A Japanese NLP Library using spaCy as framework based on Universal Dependencies", "url": "https://github.com/megagonlabs/ginza", "project_name": "ginza", "stargazers_count": 786, "source": "GitHub", "score": 1.9083618421370006, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Tagging" ] }, { "description": "全国の町丁目レベル（277,191件）の住所データのオープンデータ", "url": "https://github.com/geolonia/japanese-addresses", "project_name": "japanese-addresses", "stargazers_count": 726, "source": "GitHub", "score": 1.7384303388399598, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "- Language Understanding with Knowledge-based Embeddings", "url": "https://github.com/studio-ousia/luke", "project_name": "luke", "stargazers_count": 722, "source": "GitHub", "score": 1.7271015719534906, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Knowledge Representation", "Semantic Text Processing" ] }, { "description": "Automatically exported from code.google.com/p/mozc-morse", "url": "https://github.com/google/mozc-devices", "project_name": "mozc-devices", "stargazers_count": 683, "source": "GitHub", "score": 1.616646094810414, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "rinna/japanese-cloob-vit-b-16", "url": "https://huggingface.co/rinna/japanese-cloob-vit-b-16", "project_name": "japanese-cloob-vit-b-16", "downloads": 229502, "source": "Hugging Face", "score": 1.577158590418526, "first_commit": "2022-04-27 08:29:29", "latest_commit": "2024-07-22 08:09:24", "languages": [], "model_or_dataset": "model", "model_size": 0.197, "model_architectures": "CLOOBModel", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "RetrievaEmbedding-01: AMBER The AMBER (Adaptive Multitask Bilingual Embedding Representations) is a text embedding model trained by Retrieva, Inc.", "url": "https://huggingface.co/retrieva-jp/amber-large", "project_name": "amber-large", "downloads": 217627, "source": "Hugging Face", "score": 1.4928564727962044, "first_commit": "2025-03-07 01:10:29", "latest_commit": "2025-03-31 09:12:41", "languages": [], "model_or_dataset": "model", "model_size": 0.315, "model_architectures": "ModernBertModel", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "Code for producing Japanese pretrained models provided by rinna Co., Ltd.", "url": "https://github.com/rinnakk/japanese-pretrained-models", "project_name": "japanese-pretrained-models", "stargazers_count": 582, "source": "GitHub", "score": 1.3305947309270618, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Language Models", "Programming Languages in NLP", "Semantic Text Processing" ] }, { "description": "Yomitoku is an AI-powered document image analysis package designed specifically for the Japanese language.", "url": "https://github.com/kotaro-kinoshita/yomitoku", "project_name": "yomitoku", "stargazers_count": 568, "source": "GitHub", "score": 1.290944046824419, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "mecab-python. mecab-python. you can find original version here:http://taku910.github.io/mecab/", "url": "https://github.com/SamuraiT/mecab-python3", "project_name": "mecab-python3", "stargazers_count": 553, "source": "GitHub", "score": 1.248461171000159, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "BERT models for Japanese text.", "url": "https://github.com/cl-tohoku/bert-japanese", "project_name": "bert-japanese", "stargazers_count": 532, "source": "GitHub", "score": 1.1889851448461946, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "NDLOCRのアプリケーション", "url": "https://github.com/ndl-lab/ndlocr_cli", "project_name": "ndlocr_cli", "stargazers_count": 527, "source": "GitHub", "score": 1.174824186238108, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Raw data for Japanese Anime", "url": "https://github.com/bangumi-data/bangumi-data", "project_name": "bangumi-data", "stargazers_count": 525, "source": "GitHub", "score": 1.169159802794873, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "BERT with SentencePiece for Japanese text.", "url": "https://github.com/yoheikikuta/bert-japanese", "project_name": "bert-japanese", "stargazers_count": 497, "source": "GitHub", "score": 1.0898584345895874, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese and Chinese dictionaries for Yomitan.", "url": "https://github.com/marvnc/yomitan-dictionaries", "project_name": "yomitan-dictionaries", "stargazers_count": 480, "source": "GitHub", "score": 1.0417111753220927, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "BERT base Japanese (IPA dictionary)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese", "project_name": "bert-base-japanese", "downloads": 153657, "source": "Hugging Face", "score": 1.0387254021183172, "first_commit": "2020-04-28 21:34:23", "latest_commit": "2024-02-22 00:57:00", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "morphological analyzer (word segmentor + PoS Tagger) for Chinese and Japanese written purely in JavaScript.", "url": "https://github.com/rakuten-nlp/rakutenma", "project_name": "rakutenma", "stargazers_count": 471, "source": "GitHub", "score": 1.0162214498275366, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "A Cython MeCab wrapper for fast, pythonic Japanese tokenization and morphological analysis.", "url": "https://github.com/polm/fugashi", "project_name": "fugashi", "stargazers_count": 439, "source": "GitHub", "score": 0.9255913147357814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "BERT base Japanese (character-level tokenization with whole word masking, jawiki-20200831)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-char-v2", "project_name": "bert-base-japanese-char-v2", "downloads": 135630, "source": "Hugging Face", "score": 0.9107494632233621, "first_commit": "2021-03-05 04:05:08", "latest_commit": "2021-09-23 15:45:24", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Lightweight converter from Japanese Kana-kanji sentences into Kana-Roman.", "url": "https://github.com/miurahr/pykakasi", "project_name": "pykakasi", "stargazers_count": 430, "source": "GitHub", "score": 0.9001015892412253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Language Models", "Text Normalization" ] }, { "description": "Manga OCR Optical character recognition for Japanese text, with the main focus being Japanese manga.", "url": "https://huggingface.co/kha-white/manga-ocr-base", "project_name": "manga-ocr-base", "downloads": 131243, "source": "Hugging Face", "score": 0.8796055988015411, "first_commit": "2022-01-15 17:39:06", "latest_commit": "2022-06-22 15:34:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VisionEncoderDecoderModel", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "A Japanese tokenizer based on recurrent neural networks", "url": "https://github.com/taishi-i/nagisa", "project_name": "nagisa", "stargazers_count": 397, "source": "GitHub", "score": 0.8066392624278529, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "This is a model for named entity recognition of Japanese medical documents.", "url": "https://huggingface.co/sociocom/MedNER-CR-JA", "project_name": "MedNER-CR-JA", "downloads": 117915, "source": "Hugging Face", "score": 0.7849884515450946, "first_commit": "2022-08-23 03:30:43", "latest_commit": "2024-07-31 07:44:00", "languages": [], "model_or_dataset": "model", "model_size": 0.11, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition", "Tagging" ] }, { "description": "「大規模言語モデル入門」（技術評論社, 2023）のGitHubリポジトリ", "url": "https://github.com/ghmagazine/llm-book", "project_name": "llm-book", "stargazers_count": 387, "source": "GitHub", "score": 0.7783173452116794, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "BERT base Japanese (character tokenization)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-char", "project_name": "bert-base-japanese-char", "downloads": 116350, "source": "Hugging Face", "score": 0.7738783198331844, "first_commit": "2020-04-28 21:34:05", "latest_commit": "2024-02-22 00:57:58", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Juman++ (a Morphological Analyzer Toolkit)", "url": "https://github.com/ku-nlp/jumanpp", "project_name": "jumanpp", "stargazers_count": 385, "source": "GitHub", "score": 0.7726529617684448, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "テキストを壱百満天原サロメお嬢様風の口調に変換します", "url": "https://github.com/jiro4989/ojosama", "project_name": "ojosama", "stargazers_count": 383, "source": "GitHub", "score": 0.7669885783252101, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "BERT base Japanese (IPA dictionary, whole word masking enabled)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-whole-word-masking", "project_name": "bert-base-japanese-whole-word-masking", "downloads": 112699, "source": "Hugging Face", "score": 0.7479594119225237, "first_commit": "2020-04-28 21:34:35", "latest_commit": "2024-02-22 00:57:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "BERT base Japanese (character-level tokenization with whole word masking, CC-100 and jawiki-20230102)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-char-v3", "project_name": "bert-base-japanese-char-v3", "downloads": 112289, "source": "Hugging Face", "score": 0.7450487703877741, "first_commit": "2023-05-19 00:33:09", "latest_commit": "2023-05-19 00:39:44", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForPreTraining", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese language data on kanji, radicals, media files, fonts and related resources from Kanji alive", "url": "https://github.com/kanjialive/kanji-data-media", "project_name": "kanji-data-media", "stargazers_count": 366, "source": "GitHub", "score": 0.7188413190577152, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "This is a Japanese sentence-BERT model.", "url": "https://huggingface.co/sonoisa/sentence-bert-base-ja-mean-tokens-v2", "project_name": "sentence-bert-base-ja-mean-tokens-v2", "downloads": 108498, "source": "Hugging Face", "score": 0.7181359848798817, "first_commit": "2021-12-14 11:18:19", "latest_commit": "2024-04-17 11:39:38", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "An input method without morphological analysis.", "url": "https://github.com/codefirst/aquaskk", "project_name": "aquaskk", "stargazers_count": 355, "source": "GitHub", "score": 0.6876872101199244, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "vibrato: Viterbi-based accelerated tokenizer", "url": "https://github.com/daac-tools/vibrato", "project_name": "vibrato", "stargazers_count": 353, "source": "GitHub", "score": 0.6820228266766897, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "日本語で絵文字入力をするための IME 追加辞書 orange_book Google 日本語入力などで日本語から絵文字への変換を可能にする IME 拡張辞書", "url": "https://github.com/peaceiris/emoji-ime-dictionary", "project_name": "emoji-ime-dictionary", "stargazers_count": 350, "source": "GitHub", "score": 0.6735262515118376, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "SudachiPy 0.6* and above are developed as Sudachi.rs.", "url": "https://github.com/WorksApplications/sudachi.rs", "project_name": "sudachi.rs", "stargazers_count": 347, "source": "GitHub", "score": 0.6650296763469856, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Tagging", "Morphology", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Optical character recognition in manga images. Manga OCR desktop application", "url": "https://github.com/blueaxis/Poricom", "project_name": "Poricom", "stargazers_count": 346, "source": "GitHub", "score": 0.6621974846253682, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "NLP libary for creating pipeline components", "url": "https://github.com/PKSHATechnology-Research/camphr", "project_name": "camphr", "stargazers_count": 340, "source": "GitHub", "score": 0.6452043342956642, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "shisa-gamma-7b-v1 For more information see our main Shisa 7B model We applied a version of our fine-tune data set onto Japanese Stable LM Base Gamma 7B and it performed pretty well, just sharing since it might be of interest.", "url": "https://huggingface.co/augmxnt/shisa-gamma-7b-v1", "project_name": "shisa-gamma-7b-v1", "downloads": 97648, "source": "Hugging Face", "score": 0.6411104710944342, "first_commit": "2023-12-23 20:21:44", "latest_commit": "2024-05-19 06:07:36", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "Japanese to romaji converter in Python", "url": "https://github.com/polm/cutlet", "project_name": "cutlet", "stargazers_count": 331, "source": "GitHub", "score": 0.6197146088011081, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Linguistic tools for texts in Japanese language", "url": "https://github.com/tshatrov/ichiran", "project_name": "ichiran", "stargazers_count": 328, "source": "GitHub", "score": 0.611218033636256, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "Pure-Python Japanese character interconverter for Hiragana, Katakana, Hankaku, and Zenkaku", "url": "https://github.com/ikegami-yukino/jaconv", "project_name": "jaconv", "stargazers_count": 325, "source": "GitHub", "score": 0.602721458471404, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "A free and openly licensed Japanese-to-English dictionary compatible with multiple dictionary clients", "url": "https://github.com/stephenmk/Jitendex", "project_name": "Jitendex", "stargazers_count": 325, "source": "GitHub", "score": 0.602721458471404, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "約100年に渡るアニメ作品リストデータベース", "url": "https://github.com/anilogia/animedb", "project_name": "animedb", "stargazers_count": 323, "source": "GitHub", "score": 0.5970570750281693, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Model Card for Japanese DeBERTa V2 tiny Model description", "url": "https://huggingface.co/ku-nlp/deberta-v2-tiny-japanese", "project_name": "deberta-v2-tiny-japanese", "downloads": 90484, "source": "Hugging Face", "score": 0.5902523346189553, "first_commit": "2023-01-18 13:36:09", "latest_commit": "2023-03-23 16:13:46", "languages": [], "model_or_dataset": "model", "model_size": 0.013900000000000001, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese GPT2 Generation Model", "url": "https://github.com/tanreinama/gpt2-japanese", "project_name": "gpt2-japanese", "stargazers_count": 316, "source": "GitHub", "score": 0.5772317329768478, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "The purpose of this repository is to make prototypes as case study in the context of proof of concept(PoC) and research and development(R&D) that I have written in my website. The main research topics are Auto-Encoders in relation to the representation learning, the statistical machine learning for energy-based models, adversarial generation net…", "url": "https://github.com/accel-brain/accel-brain-code", "project_name": "accel-brain-code", "stargazers_count": 314, "source": "GitHub", "score": 0.5715673495336132, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Representation Learning", "Robustness in NLP" ] }, { "description": "pdf-translator translates English PDF files into Japanese, preserving the original layout.", "url": "https://github.com/discus0434/pdf-translator", "project_name": "pdf-translator", "stargazers_count": 314, "source": "GitHub", "score": 0.5715673495336132, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "▽▼ SKK-like Japanese Input Method Editor for Windows", "url": "https://github.com/nathancorvussolis/corvusskk", "project_name": "corvusskk", "stargazers_count": 312, "source": "GitHub", "score": 0.5659029660903785, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "JGLUE: Japanese General Language Understanding Evaluation", "url": "https://github.com/yahoojapan/JGLUE", "project_name": "JGLUE", "stargazers_count": 311, "source": "GitHub", "score": 0.5630707743687612, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Language Models", "Semantic Text Processing", "Low-Resource NLP", "Explainability & Interpretability in NLP" ] }, { "description": "llm-book/bert-base-japanese-v3-ner-wikipedia-dataset 「大規模言語モデル入門」の第6章で紹介している固有表現認識のモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-ner-wikipedia-dataset", "project_name": "bert-base-japanese-v3-ner-wikipedia-dataset", "downloads": 84912, "source": "Hugging Face", "score": 0.5506960062491385, "first_commit": "2023-05-28 08:06:41", "latest_commit": "2023-07-25 13:32:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition", "Language Models", "Semantic Text Processing" ] }, { "description": "Building AI-based conversational avatars lightning fast", "url": "https://github.com/uezo/aiavatarkit", "project_name": "aiavatarkit", "stargazers_count": 306, "source": "GitHub", "score": 0.5489098157606744, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "This is a model for named entity recognition of Japanese medical documents.", "url": "https://huggingface.co/sociocom/MedNERN-CR-JA", "project_name": "MedNERN-CR-JA", "downloads": 79356, "source": "Hugging Face", "score": 0.5112532638904337, "first_commit": "2023-04-13 08:25:56", "latest_commit": "2024-02-26 13:53:06", "languages": [], "model_or_dataset": "model", "model_size": 0.11, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Named Entity Recognition", "Tagging" ] }, { "description": "Japanese Stable Diffusion is a Japanese specific latent text-to-image diffusion model capable of generating photo-realistic images given any text input.", "url": "https://github.com/rinnakk/japanese-stable-diffusion", "project_name": "japanese-stable-diffusion", "stargazers_count": 284, "source": "GitHub", "score": 0.48660159788509283, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "GLuCoSE (General Luke-based Contrastive Sentence Embedding)-base-Japanese 日本語のREADME/Japanese README GLuCoSE (General LUke-based COntrastive Sentence Embedding, \"glucose\") is a Japanese text embedding model based on LUKE.", "url": "https://huggingface.co/pkshatech/GLuCoSE-base-ja", "project_name": "GLuCoSE-base-ja", "downloads": 74140, "source": "Hugging Face", "score": 0.4742242242678628, "first_commit": "2023-07-16 07:28:46", "latest_commit": "2023-08-25 02:53:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeModel", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "Japanese text normalizer for mecab-neologd", "url": "https://github.com/ikegami-yukino/neologdn", "project_name": "neologdn", "stargazers_count": 279, "source": "GitHub", "score": 0.4724406392770061, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Restaurant Search System through Dialogue in Japanese.", "url": "https://github.com/Hironsan/HotPepperGourmetDialogue", "project_name": "HotPepperGourmetDialogue", "stargazers_count": 275, "source": "GitHub", "score": 0.4611118723905367, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Information Retrieval", "Dialogue Systems & Conversational Agents" ] }, { "description": "Tacotron2 implementation of Japanese", "url": "https://github.com/CjangCjengh/tacotron2-japanese", "project_name": "tacotron2-japanese", "stargazers_count": 268, "source": "GitHub", "score": 0.4412865303392153, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multimodality" ] }, { "description": "「BERTによる自然言語処理入門: Transformersを使った実践プログラミング」サポートページ", "url": "https://github.com/stockmarkteam/bert-book", "project_name": "bert-book", "stargazers_count": 262, "source": "GitHub", "score": 0.42429338000951117, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "This repository is for building Windows 64-bit MeCab binary and improving MeCab Python binding.", "url": "https://github.com/ikegami-yukino/mecab", "project_name": "mecab", "stargazers_count": 255, "source": "GitHub", "score": 0.40446803795818975, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "DeepSeek-V3-slice-jp64 本モデルは DeepSeek-V3 をベースに、日本語の例文を元に頻出する MoE (Mixture of Experts) の各レイヤーごとのexpertsを厳選して再構成したモデルのgguf版です。", "url": "https://huggingface.co/mmnga/DeepSeek-V3-slice-jp64-gguf", "project_name": "DeepSeek-V3-slice-jp64-gguf", "downloads": 62014, "source": "Hugging Face", "score": 0.3881402260962189, "first_commit": "2025-01-05 14:11:41", "latest_commit": "2025-01-05 15:26:26", "languages": [], "model_or_dataset": "model", "model_size": 181.0, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "A lexicon for Sudachi", "url": "https://github.com/WorksApplications/SudachiDict", "project_name": "SudachiDict", "stargazers_count": 246, "source": "GitHub", "score": 0.37897831246363367, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "A tool for dividing the Japanese full name into a family name and a given name.", "url": "https://github.com/rskmoi/namedivider-python", "project_name": "namedivider-python", "stargazers_count": 245, "source": "GitHub", "score": 0.3761461207420163, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing" ] }, { "description": "Konoha: Simple wrapper of Japanese Tokenizers", "url": "https://github.com/himkt/konoha", "project_name": "konoha", "stargazers_count": 242, "source": "GitHub", "score": 0.3676495455771643, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Visualization Module for Natural Language Processing", "url": "https://github.com/takapy0210/nlplot", "project_name": "nlplot", "stargazers_count": 240, "source": "GitHub", "score": 0.36198516213392956, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Code for evaluating Japanese pretrained models provided by NTT Ltd.", "url": "https://github.com/nttcslab/japanese-dialog-transformers", "project_name": "japanese-dialog-transformers", "stargazers_count": 240, "source": "GitHub", "score": 0.36198516213392956, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Responsible & Trustworthy NLP", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "Vaporetto: Very Accelerated POintwise pREdicTion based TOkenizer", "url": "https://github.com/daac-tools/vaporetto", "project_name": "vaporetto", "stargazers_count": 237, "source": "GitHub", "score": 0.35348858696907753, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "FuguMT", "url": "https://huggingface.co/staka/fugumt-en-ja", "project_name": "fugumt-en-ja", "downloads": 56998, "source": "Hugging Face", "score": 0.3525310116125503, "first_commit": "2022-05-08 04:23:57", "latest_commit": "2023-08-15 17:45:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MarianMTModel", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Semantic Text Processing" ] }, { "description": "日本語OCR", "url": "https://github.com/tanreinama/OCR_Japanease", "project_name": "OCR_Japanease", "stargazers_count": 234, "source": "GitHub", "score": 0.3449920118042255, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Using Vim as an input method for X11 apps", "url": "https://github.com/algon-320/vime", "project_name": "vime", "stargazers_count": 231, "source": "GitHub", "score": 0.3364954366393735, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "FuguMT", "url": "https://huggingface.co/staka/fugumt-ja-en", "project_name": "fugumt-ja-en", "downloads": 54486, "source": "Hugging Face", "score": 0.3346980078679379, "first_commit": "2022-05-08 04:32:09", "latest_commit": "2023-08-15 17:40:58", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MarianMTModel", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Semantic Text Processing" ] }, { "description": "This is a Japanese sentence-BERT model.", "url": "https://huggingface.co/sonoisa/sentence-bert-base-ja-mean-tokens", "project_name": "sentence-bert-base-ja-mean-tokens", "downloads": 50918, "source": "Hugging Face", "score": 0.30936832738992165, "first_commit": "2021-07-22 06:11:37", "latest_commit": "2024-04-17 11:40:03", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "J-Moshi: A Japanese Full-duplex Spoken Dialogue System", "url": "https://github.com/nu-dialogue/j-moshi", "project_name": "j-moshi", "stargazers_count": 221, "source": "GitHub", "score": 0.3081735194232, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "JMdict and JMnedict in JSON format", "url": "https://github.com/scriptin/jmdict-simplified", "project_name": "jmdict-simplified", "stargazers_count": 220, "source": "GitHub", "score": 0.3053413277015827, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Yet another Japanese IME for IBus/Linux", "url": "https://github.com/akaza-im/akaza", "project_name": "akaza", "stargazers_count": 217, "source": "GitHub", "score": 0.29684475253673065, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "JTubeSpeech: Corpus of Japanese speech collected from YouTube", "url": "https://github.com/sarulab-speech/jtubespeech", "project_name": "jtubespeech", "stargazers_count": 217, "source": "GitHub", "score": 0.29684475253673065, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Python wrapper for OpenJTalk", "url": "https://github.com/r9y9/pyopenjtalk", "project_name": "pyopenjtalk", "stargazers_count": 215, "source": "GitHub", "score": 0.2911803690934959, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Syntactic Text Processing", "Dialogue Systems & Conversational Agents" ] }, { "description": "A fast implementation of the Aho-Corasick algorithm using the compact double-array data structure in Rust.", "url": "https://github.com/daac-tools/daachorse", "project_name": "daachorse", "stargazers_count": 213, "source": "GitHub", "score": 0.28551598565026126, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Retrieval", "Indexing" ] }, { "description": "English-Japanese Dictionary data (Public Domain) EJDict-hand", "url": "https://github.com/kujirahand/EJDict", "project_name": "EJDict", "stargazers_count": 211, "source": "GitHub", "score": 0.27985160220702654, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Ruri: Japanese General Text Embeddings Usage First install the Sentence Transformers library: pip install -U sentence-transformers fugashi sentencepiece unidic-lite Then you can load this model and run inference.", "url": "https://huggingface.co/cl-nagoya/ruri-small-v2", "project_name": "ruri-small-v2", "downloads": 46496, "source": "Hugging Face", "score": 0.2779759935687927, "first_commit": "2024-12-05 01:25:14", "latest_commit": "2025-03-16 14:46:08", "languages": [], "model_or_dataset": "model", "model_size": 0.0681, "model_architectures": "DistilBertModel", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese postal code data.", "url": "https://github.com/polm/posuto", "project_name": "posuto", "stargazers_count": 210, "source": "GitHub", "score": 0.2770194104854092, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "The Kyoto Text Analysis Toolkit for word segmentation and pronunciation estimation, etc.", "url": "https://github.com/neubig/kytea", "project_name": "kytea", "stargazers_count": 207, "source": "GitHub", "score": 0.26852283532055715, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Tutorial to train fastText with Japanese corpus", "url": "https://github.com/icoxfog417/fastTextJapaneseTutorial", "project_name": "fastTextJapaneseTutorial", "stargazers_count": 205, "source": "GitHub", "score": 0.2628584518773225, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification" ] }, { "description": "ITAコーパスの文章リスト", "url": "https://github.com/mmorise/ita-corpus", "project_name": "ita-corpus", "stargazers_count": 204, "source": "GitHub", "score": 0.2600262601557051, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "The Japanese analysis plugin for elasticsearch", "url": "https://github.com/worksapplications/elasticsearch-sudachi", "project_name": "elasticsearch-sudachi", "stargazers_count": 197, "source": "GitHub", "score": 0.2402009181043837, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Information Retrieval" ] }, { "description": "Java library for identifying Japanese characters from images", "url": "https://github.com/sakarika/kanjitomo-ocr", "project_name": "kanjitomo-ocr", "stargazers_count": 192, "source": "GitHub", "score": 0.22603995949629696, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Visual Data in NLP", "Multimodality" ] }, { "description": "Model Card for Japanese DeBERTa V2 base Model description This is a Japanese DeBERTa V2 base model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/deberta-v2-base-japanese", "project_name": "deberta-v2-base-japanese", "downloads": 38889, "source": "Hugging Face", "score": 0.22397294441064527, "first_commit": "2023-01-05 08:04:14", "latest_commit": "2023-05-12 14:13:03", "languages": [], "model_or_dataset": "model", "model_size": 0.137, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Sentence boundary disambiguation tool for Japanese texts (日本語文境界判定器)", "url": "https://github.com/megagonlabs/bunkai", "project_name": "bunkai", "stargazers_count": 189, "source": "GitHub", "score": 0.21754338433144493, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "Dataset introduced in the paper \"Towards Fully Automated Manga Translation\" presented in AAAI21", "url": "https://github.com/mantra-inc/open-mantra-dataset", "project_name": "open-mantra-dataset", "stargazers_count": 189, "source": "GitHub", "score": 0.21754338433144493, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-base-long", "project_name": "t5-base-long", "downloads": 37753, "source": "Hugging Face", "score": 0.21590833762168046, "first_commit": "2023-04-26 08:30:59", "latest_commit": "2023-05-10 10:00:00", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Topologically ordered lists of kanji for effective learning", "url": "https://github.com/scriptin/topokanji", "project_name": "topokanji", "stargazers_count": 186, "source": "GitHub", "score": 0.2090468091665929, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "ディープラーニングモデルの性能を体系的に最大化するためのプレイブック", "url": "https://github.com/Valkyrja3607/tuning_playbook_ja", "project_name": "tuning_playbook_ja", "stargazers_count": 184, "source": "GitHub", "score": 0.2033824257233582, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Can neural networks transliterate Romaji into Japanese correctly?", "url": "https://github.com/Kyubyong/neural_japanese_transliterator", "project_name": "neural_japanese_transliterator", "stargazers_count": 178, "source": "GitHub", "score": 0.18638927539365413, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Representation Learning", "Text Generation" ] }, { "description": "A free online, self-hostable, multilang Japanese dictionary.", "url": "https://github.com/WeDontPanic/Jotoba", "project_name": "Jotoba", "stargazers_count": 177, "source": "GitHub", "score": 0.18355708367203677, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "日本語における不適切表現を収集します。自然言語処理の時のデータクリーニング用等に使えると思います。", "url": "https://github.com/MosasoM/inappropriate-words-ja", "project_name": "inappropriate-words-ja", "stargazers_count": 176, "source": "GitHub", "score": 0.18072489195041944, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "JapaneseEmbeddingEval", "url": "https://github.com/oshizo/JapaneseEmbeddingEval", "project_name": "JapaneseEmbeddingEval", "stargazers_count": 175, "source": "GitHub", "score": 0.1778927002288021, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Semantic Text Processing" ] }, { "description": "Summarize arXiv paper with figures", "url": "https://github.com/rkmt/summarize_arxv", "project_name": "summarize_arxv", "stargazers_count": 174, "source": "GitHub", "score": 0.17506050850718474, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining" ] }, { "description": "Phishing URL dataset from JPCERT/CC", "url": "https://github.com/JPCERTCC/phishurl-list", "project_name": "phishurl-list", "stargazers_count": 172, "source": "GitHub", "score": 0.16939612506395005, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "国会議案データベース：衆議院", "url": "https://github.com/smartnews-smri/house-of-representatives", "project_name": "house-of-representatives", "stargazers_count": 171, "source": "GitHub", "score": 0.16656393334233272, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "A JSON kanji dataset with updated JLPT levels and WaniKani information", "url": "https://github.com/davidluzgouveia/kanji-data", "project_name": "kanji-data", "stargazers_count": 170, "source": "GitHub", "score": 0.16373174162071535, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "PLaMo-Embedding-1B 日本語版のREADME/Japanese README Model Overview PLaMo-Embedding-1B is a Japanese text embedding model developed by Preferred Networks, Inc.", "url": "https://huggingface.co/pfnet/plamo-embedding-1b", "project_name": "plamo-embedding-1b", "downloads": 29763, "source": "Hugging Face", "score": 0.15918632332253524, "first_commit": "2025-04-11 08:43:31", "latest_commit": "2025-04-17 11:17:46", "languages": [], "model_or_dataset": "model", "model_size": 1.05, "model_architectures": "PlamoBiModel", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "このモデルは何？ ", "url": "https://huggingface.co/Lasorco/lametta", "project_name": "lametta", "downloads": 27883, "source": "Hugging Face", "score": 0.14583996701685403, "first_commit": "2023-03-28 14:29:55", "latest_commit": "2023-11-08 07:37:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "A Japanese DistilBERT pretrained model, which was trained on Wikipedia.", "url": "https://github.com/BandaiNamcoResearchInc/DistilBERT-base-jp", "project_name": "DistilBERT-base-jp", "stargazers_count": 161, "source": "GitHub", "score": 0.13824201612615925, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Responsible & Trustworthy NLP", "Representation Learning", "Language Models", "Green & Sustainable NLP", "Semantic Text Processing" ] }, { "description": "Llama-3-ELYZA-JP-8B Model Description Llama-3-ELYZA-JP-8B is a large language model trained by ELYZA, Inc.", "url": "https://huggingface.co/elyza/Llama-3-ELYZA-JP-8B", "project_name": "Llama-3-ELYZA-JP-8B", "downloads": 26764, "source": "Hugging Face", "score": 0.13789604536469588, "first_commit": "2024-06-25 06:32:13", "latest_commit": "2024-06-26 02:56:23", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "rinna/japanese-clip-vit-b-16", "url": "https://huggingface.co/rinna/japanese-clip-vit-b-16", "project_name": "japanese-clip-vit-b-16", "downloads": 26580, "source": "Hugging Face", "score": 0.1365898062369058, "first_commit": "2022-04-27 07:52:33", "latest_commit": "2024-07-20 08:42:32", "languages": [], "model_or_dataset": "model", "model_size": 0.197, "model_architectures": "CLIPModel", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Japanese word embedding with Sudachi and NWJC", "url": "https://github.com/WorksApplications/chiVe", "project_name": "chiVe", "stargazers_count": 160, "source": "GitHub", "score": 0.1354098244045419, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "WRIME: 主観と客観の感情分析データセット", "url": "https://github.com/ids-cv/wrime", "project_name": "wrime", "stargazers_count": 160, "source": "GitHub", "score": 0.1354098244045419, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "GLuCoSE v2", "url": "https://huggingface.co/pkshatech/GLuCoSE-base-ja-v2", "project_name": "GLuCoSE-base-ja-v2", "downloads": 25414, "source": "Hugging Face", "score": 0.12831222567710565, "first_commit": "2024-08-22 03:16:48", "latest_commit": "2024-09-18 09:21:54", "languages": [], "model_or_dataset": "model", "model_size": 0.133, "model_architectures": "LukeModel", "multi_labels": [ "Representation Learning", "Information Retrieval", "Semantic Text Processing" ] }, { "description": "Japanese dictation kit using Julius", "url": "https://github.com/julius-speech/dictation-kit", "project_name": "dictation-kit", "stargazers_count": 157, "source": "GitHub", "score": 0.12691324923968986, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "GIS & Archaeological Simulator. 2023 in development.", "url": "https://github.com/AsPJT/PAX_SAPIENTICA", "project_name": "PAX_SAPIENTICA", "stargazers_count": 157, "source": "GitHub", "score": 0.12691324923968986, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Information Extraction & Text Mining" ] }, { "description": "Sarashina-Embedding-v1-1B 日本語のREADME/Japanese README \"Sarashina-Embedding-v1-1B\" is a Japanese text embedding model, based on the 1.2B-parameter Japanese LLM \"Sarashina2.1-1B\".", "url": "https://huggingface.co/sbintuitions/sarashina-embedding-v1-1b", "project_name": "sarashina-embedding-v1-1b", "downloads": 23902, "source": "Hugging Face", "score": 0.11757834762700457, "first_commit": "2024-11-22 05:27:37", "latest_commit": "2025-01-31 00:56:25", "languages": [], "model_or_dataset": "model", "model_size": 1.22, "model_architectures": "LlamaModel", "multi_labels": [ "Representation Learning", "Semantic Similarity", "Semantic Text Processing" ] }, { "description": "Kotoba-Whisper-v2.2 Kotoba-Whisper-v2.2 is a Japanese ASR model based on kotoba-tech/kotoba-whisper-v2.0, with additional postprocessing stacks integrated as pipeline.", "url": "https://huggingface.co/kotoba-tech/kotoba-whisper-v2.2", "project_name": "kotoba-whisper-v2.2", "downloads": 22890, "source": "Hugging Face", "score": 0.11039403242415914, "first_commit": "2024-10-18 14:56:36", "latest_commit": "2024-10-23 00:49:36", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "Japanese negative positive classification.日本語文書のネガポジを判定。", "url": "https://github.com/liaoziyang/negapoji", "project_name": "negapoji", "stargazers_count": 151, "source": "GitHub", "score": 0.10992009890998579, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification" ] }, { "description": "japanese-gpt2-xsmall", "url": "https://huggingface.co/rinna/japanese-gpt2-xsmall", "project_name": "japanese-gpt2-xsmall", "downloads": 22518, "source": "Hugging Face", "score": 0.10775315766580094, "first_commit": "2021-07-26 02:52:54", "latest_commit": "2024-07-20 07:48:11", "languages": [], "model_or_dataset": "model", "model_size": 0.0437, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "JP Language Model Evaluation Harness", "url": "https://github.com/Stability-AI/lm-evaluation-harness/tree/jp-stable", "project_name": "jp-stable", "stargazers_count": 150, "source": "GitHub", "score": 0.10708790718836844, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese sentiment analyzer implemented in Python.", "url": "https://github.com/Hironsan/asari", "project_name": "asari", "stargazers_count": 148, "source": "GitHub", "score": 0.10142352374513375, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Sentiment Analysis" ] }, { "description": "A fast converter between Japanese hankaku and zenkaku characters", "url": "https://github.com/studio-ousia/mojimoji", "project_name": "mojimoji", "stargazers_count": 146, "source": "GitHub", "score": 0.09575914030189905, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Shell command launcher with natural language", "url": "https://github.com/hirokidaichi/wanna", "project_name": "wanna", "stargazers_count": 145, "source": "GitHub", "score": 0.0929269485802817, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "bert-finetuned-japanese-sentiment This model is a fine-tuned version of cl-tohoku/bert-base-japanese-v2 on product amazon reviews japanese dataset.", "url": "https://huggingface.co/christian-phu/bert-finetuned-japanese-sentiment", "project_name": "bert-finetuned-japanese-sentiment", "downloads": 20414, "source": "Hugging Face", "score": 0.0928165972045492, "first_commit": "2023-04-06 16:43:51", "latest_commit": "2023-04-07 17:27:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing", "Sentiment Analysis" ] }, { "description": "japanese-gpt2-medium This repository provides a medium-sized Japanese GPT-2 model.", "url": "https://huggingface.co/rinna/japanese-gpt2-medium", "project_name": "japanese-gpt2-medium", "downloads": 20363, "source": "Hugging Face", "score": 0.09245454179412911, "first_commit": "2021-04-05 02:01:26", "latest_commit": "2024-07-20 07:50:47", "languages": [], "model_or_dataset": "model", "model_size": 0.361, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "日本語に翻訳したStanford Alpacaのデータセットを用いてLLaMAをファインチューニングし作成したLow-Rank AdapterのリンクとGenerateサンプルコード", "url": "https://github.com/kunishou/Japanese-Alpaca-LoRA", "project_name": "Japanese-Alpaca-LoRA", "stargazers_count": 142, "source": "GitHub", "score": 0.08443037341542968, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [] }, { "description": "LaBSE Model description Language-agnostic BERT Sentence Encoder (LaBSE) is a BERT-based model trained for sentence embedding for 109 languages.", "url": "https://huggingface.co/setu4993/LaBSE", "project_name": "LaBSE", "downloads": 18909, "source": "Hugging Face", "score": 0.0821324130343097, "first_commit": "2021-01-11 06:06:51", "latest_commit": "2023-10-18 23:23:16", "languages": [], "model_or_dataset": "model", "model_size": 0.47100000000000003, "model_architectures": "BertModel", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Model Card for Japanese character-level DeBERTa V2 large Model description This is a Japanese DeBERTa V2 large model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/deberta-v2-large-japanese-char-wwm", "project_name": "deberta-v2-large-japanese-char-wwm", "downloads": 18899, "source": "Hugging Face", "score": 0.08206142177736458, "first_commit": "2023-03-09 10:13:05", "latest_commit": "2023-09-15 03:48:28", "languages": [], "model_or_dataset": "model", "model_size": 0.33, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "chakki's Aspect-Based Sentiment Analysis dataset", "url": "https://github.com/chakki-works/chABSA-dataset", "project_name": "chABSA-dataset", "stargazers_count": 140, "source": "GitHub", "score": 0.07876598997219499, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Aspect-based Sentiment Analysis", "Sentiment Analysis" ] }, { "description": "Llama 3.1 Swallow - Built with Llama Llama 3.1 Swallow is a series of large language models (8B, 70B) that were built by continual pre-training on the Meta Llama 3.1 models.", "url": "https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.3", "project_name": "Llama-3.1-Swallow-8B-Instruct-v0.3", "downloads": 18145, "source": "Hugging Face", "score": 0.07670868100370307, "first_commit": "2024-12-18 04:31:10", "latest_commit": "2025-01-27 04:00:42", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "自然言語で書かれた時間情報表現を抽出/規格化するルールベースの解析器", "url": "https://github.com/yagays/ja-timex", "project_name": "ja-timex", "stargazers_count": 139, "source": "GitHub", "score": 0.07593379825057764, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Kanji usage frequency data collected from various sources", "url": "https://github.com/scriptin/kanji-frequency", "project_name": "kanji-frequency", "stargazers_count": 139, "source": "GitHub", "score": 0.07593379825057764, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Information Extraction & Text Mining" ] }, { "description": "A set of metrics for feature selection from text data", "url": "https://github.com/Kensuke-Mitsuzawa/JapaneseTokenizers", "project_name": "JapaneseTokenizers", "stargazers_count": 138, "source": "GitHub", "score": 0.07310160652896029, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Text Generation", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Python 3 library for manipulating Jim Breen's JMdict, KanjiDic2, JMnedict and kanji-radical mappings", "url": "https://github.com/neocl/jamdict", "project_name": "jamdict", "stargazers_count": 137, "source": "GitHub", "score": 0.07026941480734294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Wikipediaを用いた日本語の固有表現抽出データセット", "url": "https://github.com/stockmarkteam/ner-wikipedia-dataset", "project_name": "ner-wikipedia-dataset", "stargazers_count": 136, "source": "GitHub", "score": 0.0674372230857256, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Information Extraction & Text Mining", "Coreference Resolution", "Named Entity Recognition", "Annotation and Dataset Development" ] }, { "description": "Llama 3.1 Swallow - Built with Llama Llama 3.1 Swallow is a series of large language models (8B, 70B) that were built by continual pre-training on the Meta Llama 3.1 models.", "url": "https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-8B-v0.1", "project_name": "Llama-3.1-Swallow-8B-v0.1", "downloads": 16575, "source": "Hugging Face", "score": 0.06556305366332034, "first_commit": "2024-08-16 03:32:28", "latest_commit": "2024-10-08 14:15:23", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "CJK computer science terms comparison / 中日韓電腦科學術語對照 / 日中韓のコンピュータ科学の用語対照 / 한·중·일 전산학 용어 대조", "url": "https://github.com/dahlia/cjk-compsci-terms", "project_name": "cjk-compsci-terms", "stargazers_count": 135, "source": "GitHub", "score": 0.06460503136410825, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "sbintuitions/sarashina2.2-3b-instruct-v0.1 Model Summary", "url": "https://huggingface.co/sbintuitions/sarashina2.2-3b-instruct-v0.1", "project_name": "sarashina2.2-3b-instruct-v0.1", "downloads": 15940, "source": "Hugging Face", "score": 0.061055108847305674, "first_commit": "2025-02-26 02:09:06", "latest_commit": "2025-03-05 07:01:29", "languages": [], "model_or_dataset": "model", "model_size": 3.36, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Explainability & Interpretability in NLP" ] }, { "description": "Node.js module for converting Japanese Hiragana and Katakana script to, and from, Romaji using Hepburn romanisation", "url": "https://github.com/lovell/hepburn", "project_name": "hepburn", "stargazers_count": 132, "source": "GitHub", "score": 0.05610845619925622, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Llama 3.1 Swallow - Built with Llama Llama 3.1 Swallow is a series of large language models (8B, 70B) that were built by continual pre-training on the Meta Llama 3.1 models.", "url": "https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.2", "project_name": "Llama-3.1-Swallow-8B-Instruct-v0.2", "downloads": 14967, "source": "Hugging Face", "score": 0.0541476595465462, "first_commit": "2024-10-30 23:43:29", "latest_commit": "2025-01-27 04:24:45", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "japanese-gpt-neox-3.6b Overview This repository provides a Japanese GPT-NeoX model of 3.6 billion parameters.", "url": "https://huggingface.co/rinna/japanese-gpt-neox-3.6b", "project_name": "japanese-gpt-neox-3.6b", "downloads": 14895, "source": "Hugging Face", "score": 0.05363652249654138, "first_commit": "2023-05-17 02:16:45", "latest_commit": "2024-07-20 07:55:19", "languages": [], "model_or_dataset": "model", "model_size": 3.76, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "A unified language analyzer for Japanese", "url": "https://github.com/ku-nlp/kwja", "project_name": "kwja", "stargazers_count": 131, "source": "GitHub", "score": 0.05327626447763887, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing" ] }, { "description": "日本のオープンデータ情報一覧・まとめ", "url": "https://github.com/japan-opendata/awesome-japan-opendata", "project_name": "awesome-japan-opendata", "stargazers_count": 131, "source": "GitHub", "score": 0.05327626447763887, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "A list of pre-trained BERT models for Japanese with word/subword tokenization + vocabulary construction algorithm information", "url": "https://github.com/himkt/awesome-bert-japanese", "project_name": "awesome-bert-japanese", "stargazers_count": 130, "source": "GitHub", "score": 0.05044407275602152, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Tanuki-8x8B-dpo-v1.0-GPTQ-4bit 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8x8B-dpo-v1.0のGPTQ 4bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-GPTQ-4bit", "project_name": "Tanuki-8x8B-dpo-v1.0-GPTQ-4bit", "downloads": 14385, "source": "Hugging Face", "score": 0.050015968392340626, "first_commit": "2024-08-27 18:19:13", "latest_commit": "2024-09-03 09:27:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "TanukiForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "clip-japanese-base This is a Japanese CLIP (Contrastive Language-Image Pre-training) model developed by LY Corporation.", "url": "https://huggingface.co/line-corporation/clip-japanese-base", "project_name": "clip-japanese-base", "downloads": 14224, "source": "Hugging Face", "score": 0.04887300915552431, "first_commit": "2024-04-24 01:36:22", "latest_commit": "2024-05-10 03:07:04", "languages": [], "model_or_dataset": "model", "model_size": 0.197, "model_architectures": "CLYPModel", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "日本語T5事前学習済みモデル This is a T5 (Text-to-Text Transfer Transformer) model pretrained on Japanese corpus. ", "url": "https://huggingface.co/sonoisa/t5-base-japanese", "project_name": "t5-base-japanese", "downloads": 13902, "source": "Hugging Face", "score": 0.04658709068189167, "first_commit": "2021-03-28 10:54:32", "latest_commit": "2022-07-31 08:20:41", "languages": [], "model_or_dataset": "model", "model_size": 0.223, "model_architectures": "T5Model", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "AXCXEPT様の AXCXEPT/EZO-gemma-2-2b-jpn-it をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/EZO-gemma-2-2b-jpn-it-GGUF", "project_name": "EZO-gemma-2-2b-jpn-it-GGUF", "downloads": 13791, "source": "Hugging Face", "score": 0.045799087729800916, "first_commit": "2024-10-04 11:23:04", "latest_commit": "2024-10-04 13:31:46", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "deep-learning-with-pytorchの日本語版repositoryです。", "url": "https://github.com/Gin5050/deep-learning-with-pytorch-ja", "project_name": "deep-learning-with-pytorch-ja", "stargazers_count": 128, "source": "GitHub", "score": 0.04477968931278683, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "BERT base Japanese (unidic-lite with whole word masking, jawiki-20200831)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-v2", "project_name": "bert-base-japanese-v2", "downloads": 13200, "source": "Hugging Face", "score": 0.04160350444434475, "first_commit": "2021-03-05 03:37:30", "latest_commit": "2021-09-23 15:45:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Simple python API to read annotation data of Manga109", "url": "https://github.com/manga109/manga109api", "project_name": "manga109api", "stargazers_count": 126, "source": "GitHub", "score": 0.039115305869552135, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Tagging", "Morphology" ] }, { "description": "Ruri: Japanese General Text Embeddings Usage First install the Sentence Transformers library: pip install -U sentence-transformers fugashi sentencepiece unidic-lite Then you can load this model and run inference.", "url": "https://huggingface.co/cl-nagoya/ruri-base-v2", "project_name": "ruri-base-v2", "downloads": 12781, "source": "Hugging Face", "score": 0.03862897077834451, "first_commit": "2024-12-05 01:25:34", "latest_commit": "2025-03-17 02:28:01", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertModel", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "hotchpotch/japanese-reranker-cross-encoder-xsmall-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", "url": "https://huggingface.co/hotchpotch/japanese-reranker-cross-encoder-xsmall-v1", "project_name": "japanese-reranker-cross-encoder-xsmall-v1", "downloads": 12640, "source": "Hugging Face", "score": 0.037627994055418425, "first_commit": "2024-03-28 04:29:26", "latest_commit": "2024-06-10 03:57:05", "languages": [], "model_or_dataset": "model", "model_size": 0.107, "model_architectures": "XLMRobertaForSequenceClassification", "multi_labels": [ "Language Models" ] }, { "description": "LINE DistilBERT Japanese This is a DistilBERT model pre-trained on 131 GB of Japanese web text.", "url": "https://huggingface.co/line-corporation/line-distilbert-base-japanese", "project_name": "line-distilbert-base-japanese", "downloads": 12552, "source": "Hugging Face", "score": 0.03700327099430143, "first_commit": "2023-03-10 10:23:54", "latest_commit": "2023-12-01 09:50:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DistilBertForMaskedLM", "multi_labels": [ "Representation Learning", "Language Models", "Green & Sustainable NLP", "Semantic Text Processing" ] }, { "description": "このツールは、複数のデータセットを横断して日本語の大規模言語モデルを自動評価するものです．", "url": "https://github.com/llm-jp/llm-jp-eval", "project_name": "llm-jp-eval", "stargazers_count": 125, "source": "GitHub", "score": 0.036283114147934795, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "japanese-gpt-neox-3.6b-instruction-sft Overview This repository provides a Japanese GPT-NeoX model of 3.6 billion parameters.", "url": "https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-sft", "project_name": "japanese-gpt-neox-3.6b-instruction-sft", "downloads": 11827, "source": "Hugging Face", "score": 0.031856404865780744, "first_commit": "2023-05-17 02:16:28", "latest_commit": "2024-07-20 07:56:34", "languages": [], "model_or_dataset": "model", "model_size": 3.76, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "Ruri: Japanese General Text Embeddings Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-small", "project_name": "ruri-small", "downloads": 11754, "source": "Hugging Face", "score": 0.03133816869008142, "first_commit": "2024-08-28 16:23:12", "latest_commit": "2024-09-04 08:49:30", "languages": [], "model_or_dataset": "model", "model_size": 0.0681, "model_architectures": "DistilBertModel", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "複数の前処理を構成して管理するテキスト前処理ツール", "url": "https://github.com/HojiChar/HojiChar", "project_name": "HojiChar", "stargazers_count": 121, "source": "GitHub", "score": 0.024954347261465407, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "A comparison tool of Japanese tokenizers", "url": "https://github.com/taishi-i/toiro", "project_name": "toiro", "stargazers_count": 121, "source": "GitHub", "score": 0.024954347261465407, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "[Llama-3.1-8B-EZO-1.1-it] Model Card モデル情報 / Model Information このモデルは、Meta AI の Llama 3.1 をベースに、日本語タスクでの性能を向上させるためにファインチューニングを行ったものです。 ", "url": "https://huggingface.co/HODACHI/Llama-3.1-8B-EZO-1.1-it", "project_name": "Llama-3.1-8B-EZO-1.1-it", "downloads": 10197, "source": "Hugging Face", "score": 0.02028482998372734, "first_commit": "2024-07-28 23:24:59", "latest_commit": "2024-08-04 06:16:48", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "japanese-gpt2-small This repository provides a small-sized Japanese GPT-2 model.", "url": "https://huggingface.co/rinna/japanese-gpt2-small", "project_name": "japanese-gpt2-small", "downloads": 10145, "source": "Hugging Face", "score": 0.019915675447612753, "first_commit": "2021-06-15 06:32:27", "latest_commit": "2024-07-20 07:49:31", "languages": [], "model_or_dataset": "model", "model_size": 0.123, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "A desktop language immersion companion for learners of Japanese", "url": "https://github.com/fauu/Kamite", "project_name": "Kamite", "stargazers_count": 118, "source": "GitHub", "score": 0.01645777209661337, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "japanese-roberta-base This repository provides a base-sized Japanese RoBERTa model.", "url": "https://huggingface.co/rinna/japanese-roberta-base", "project_name": "japanese-roberta-base", "downloads": 9542, "source": "Hugging Face", "score": 0.015634902653822443, "first_commit": "2021-06-11 02:56:39", "latest_commit": "2024-07-20 07:44:40", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "General-purpose Swich transformer based Japanese language mode", "url": "https://github.com/tanreinama/GPTSAN", "project_name": "GPTSAN", "stargazers_count": 117, "source": "GitHub", "score": 0.013625580374996024, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "BERT large Japanese (unidic-lite with whole word masking, CC-100 and jawiki-20230102)", "url": "https://huggingface.co/tohoku-nlp/bert-large-japanese-v2", "project_name": "bert-large-japanese-v2", "downloads": 9098, "source": "Hugging Face", "score": 0.012482890845459432, "first_commit": "2023-05-19 00:40:35", "latest_commit": "2023-05-19 00:47:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForPreTraining", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "sbert-jsnli-luke-japanese-base-lite This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search.", "url": "https://huggingface.co/oshizo/sbert-jsnli-luke-japanese-base-lite", "project_name": "sbert-jsnli-luke-japanese-base-lite", "downloads": 9079, "source": "Hugging Face", "score": 0.012348007457263718, "first_commit": "2023-01-10 11:53:15", "latest_commit": "2023-01-10 12:36:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeModel", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "lists of text corpus and more (mainly Japanese)", "url": "https://github.com/ikegami-yukino/dataset-list", "project_name": "dataset-list", "stargazers_count": 116, "source": "GitHub", "score": 0.010793388653378679, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Emotion analyzer for Japanese text", "url": "https://github.com/ikegami-yukino/pymlask", "project_name": "pymlask", "stargazers_count": 115, "source": "GitHub", "score": 0.007961196931761332, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Emotion Analysis", "Sentiment Analysis" ] }, { "description": "日本語T5モデル", "url": "https://github.com/sonoisa/t5-japanese", "project_name": "t5-japanese", "stargazers_count": 115, "source": "GitHub", "score": 0.007961196931761332, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "日本語WikipediaコーパスでBERTのPre-Trainedモデルを生成するためのリポジトリ", "url": "https://github.com/Kosuke-Szk/ja_text_bert", "project_name": "ja_text_bert", "stargazers_count": 115, "source": "GitHub", "score": 0.007961196931761332, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Phonology", "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Kotoba-Whisper (v2.0)", "url": "https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0", "project_name": "kotoba-whisper-v2.0", "downloads": 8442, "source": "Hugging Face", "score": 0.007825864389860025, "first_commit": "2024-09-17 12:49:47", "latest_commit": "2024-09-20 01:56:01", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "luke-japanese-large luke-japanese is the Japanese version of LUKE (Language Understanding with Knowledge-based Embeddings), a pre-trained knowledge-enhanced contextualized representation of words and entities.", "url": "https://huggingface.co/studio-ousia/luke-japanese-large", "project_name": "luke-japanese-large", "downloads": 8332, "source": "Hugging Face", "score": 0.007044960563463783, "first_commit": "2022-11-07 14:25:53", "latest_commit": "2022-11-09 11:18:56", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForMaskedLM", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "Reazon Speech v2 DENOISED Reazon Speech v2の音声ファイルをUVRを使用してBGMやノイズ除去したこのデータセットのミラーです。 ", "url": "https://huggingface.co/datasets/litagin/reazon-speech-v2-denoised", "project_name": "reazon-speech-v2-denoised", "downloads": 8174, "source": "Hugging Face", "score": 0.005923298703730998, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Swallow-8Bは追加の日本語継続事前学習により日本語が大変流暢なLlama-3派生モデルです。", "url": "https://huggingface.co/aixsatoshi/Meta-Llama-3.1-8B-Instruct-plus-Swallow", "project_name": "Meta-Llama-3.1-8B-Instruct-plus-Swallow", "downloads": 8064, "source": "Hugging Face", "score": 0.005142394877334757, "first_commit": "2024-07-24 03:10:38", "latest_commit": "2024-07-24 04:03:21", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "A Japanese accent dictionary generator", "url": "https://github.com/PKSHATechnology-Research/tdmelodic", "project_name": "tdmelodic", "stargazers_count": 114, "source": "GitHub", "score": 0.005129005210143986, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese Kana Kanji conversion input method library", "url": "https://github.com/ueno/libkkc", "project_name": "libkkc", "stargazers_count": 113, "source": "GitHub", "score": 0.0022968134885266406, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "japanese-sentiment-analysis This model was trained from scratch on the chABSA dataset.", "url": "https://huggingface.co/jarvisx17/japanese-sentiment-analysis", "project_name": "japanese-sentiment-analysis", "downloads": 7650, "source": "Hugging Face", "score": 0.0022033568398070824, "first_commit": "2022-11-15 06:28:39", "latest_commit": "2024-01-20 14:45:14", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Sentiment Analysis" ] }, { "description": "十条蛍（Hotaru Jujo）の作成したLoRAを配布しています。 ", "url": "https://huggingface.co/JujoHotaru/lora", "project_name": "lora", "downloads": 7607, "source": "Hugging Face", "score": 0.001898094434943097, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Retrieval", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "transformers-ud-japanese-electra-ginza-510 (sudachitra-wordpiece, mC4 Japanese)", "url": "https://huggingface.co/megagonlabs/transformers-ud-japanese-electra-base-ginza-510", "project_name": "transformers-ud-japanese-electra-base-ginza-510", "downloads": 7542, "source": "Hugging Face", "score": 0.0014366512647998632, "first_commit": "2021-12-05 11:31:57", "latest_commit": "2021-12-05 21:12:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Llama3 Swallow - Built with Meta Llama 3", "url": "https://huggingface.co/tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1", "project_name": "Llama-3-Swallow-8B-Instruct-v0.1", "downloads": 7104, "source": "Hugging Face", "score": -0.0016727657893960822, "first_commit": "2024-06-26 04:11:25", "latest_commit": "2024-07-06 15:02:39", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Ruri: Japanese General Text Embeddings Usage First install the Sentence Transformers library: pip install -U sentence-transformers fugashi sentencepiece unidic-lite Then you can load this model and run inference.", "url": "https://huggingface.co/cl-nagoya/ruri-large", "project_name": "ruri-large", "downloads": 6957, "source": "Hugging Face", "score": -0.0027163372664892422, "first_commit": "2024-08-28 17:11:42", "latest_commit": "2024-09-04 08:49:10", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "BertModel", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "japanese-gpt-1b This repository provides a 1.3B-parameter Japanese GPT model.", "url": "https://huggingface.co/rinna/japanese-gpt-1b", "project_name": "japanese-gpt-1b", "downloads": 6907, "source": "Hugging Face", "score": -0.0030712935512148066, "first_commit": "2022-01-20 02:30:19", "latest_commit": "2024-07-20 07:52:31", "languages": [], "model_or_dataset": "model", "model_size": 1.33, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Yet Another Japanese Dependency Structure Analyzer", "url": "https://github.com/taku910/cabocha", "project_name": "cabocha", "stargazers_count": 111, "source": "GitHub", "score": -0.0033675699547080517, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing" ] }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-small-long", "project_name": "t5-small-long", "downloads": 6503, "source": "Hugging Face", "score": -0.005939340331797368, "first_commit": "2023-04-26 08:26:49", "latest_commit": "2023-05-10 10:01:29", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Windows/macOSで使える原神の単語辞書です", "url": "https://github.com/kotofurumiya/genshin-dict", "project_name": "genshin-dict", "stargazers_count": 110, "source": "GitHub", "score": -0.006199761676325397, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese text8 corpus for word embedding.", "url": "https://github.com/Hironsan/ja.text8", "project_name": "ja.text8", "stargazers_count": 110, "source": "GitHub", "score": -0.006199761676325397, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "このモデルはLuke-japanese-large-liteをファインチューニングしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-large-sentiment-analysis-wrime", "project_name": "luke-japanese-large-sentiment-analysis-wrime", "downloads": 6432, "source": "Hugging Face", "score": -0.006443378256107669, "first_commit": "2023-03-13 12:40:08", "latest_commit": "2023-05-15 12:58:08", "languages": [], "model_or_dataset": "model", "model_size": 0.41400000000000003, "model_architectures": "LukeForSequenceClassification", "multi_labels": [ "Language Models", "Emotion Analysis", "Sentiment Analysis" ] }, { "description": "ELYZA-japanese-Llama-2-7b Model Description ELYZA-japanese-Llama-2-7b は、 Llama2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b-instruct", "project_name": "ELYZA-japanese-Llama-2-7b-instruct", "downloads": 6101, "source": "Hugging Face", "score": -0.008793188860990907, "first_commit": "2023-08-28 12:58:25", "latest_commit": "2023-08-29 03:46:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "A fast LSTM Language Model for large vocabulary language like Japanese and Chinese", "url": "https://github.com/jiali-ms/JLM", "project_name": "JLM", "stargazers_count": 109, "source": "GitHub", "score": -0.009031953397942744, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Yet another mecab wrapper for nodejs", "url": "https://github.com/golbin/node-mecab-ya", "project_name": "node-mecab-ya", "stargazers_count": 109, "source": "GitHub", "score": -0.009031953397942744, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "japanese android/cli/web dictionary based on jmdict/kanjidic — 日本語　辞典　和英辞典　漢英字典　和独辞典　和蘭辞典", "url": "https://github.com/obfusk/jiten", "project_name": "jiten", "stargazers_count": 109, "source": "GitHub", "score": -0.009031953397942744, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Vecteus-v1-gguf Local-Novel-LLM-projectさんが公開しているVecteus-v1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Vecteus-v1-gguf", "project_name": "Vecteus-v1-gguf", "downloads": 5901, "source": "Hugging Face", "score": -0.010213013999893165, "first_commit": "2024-05-01 17:49:42", "latest_commit": "2024-05-01 18:37:01", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ModernBERT-Ja-310M This repository provides Japanese ModernBERT trained by SB Intuitions.", "url": "https://huggingface.co/sbintuitions/modernbert-ja-310m", "project_name": "modernbert-ja-310m", "downloads": 5679, "source": "Hugging Face", "score": -0.011789019904074672, "first_commit": "2025-02-19 10:15:07", "latest_commit": "2025-02-20 03:24:41", "languages": [], "model_or_dataset": "model", "model_size": 0.315, "model_architectures": "ModernBertForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Danbooru2023:", "url": "https://huggingface.co/datasets/nyanko7/danbooru2023", "project_name": "danbooru2023", "downloads": 5405, "source": "Hugging Face", "score": -0.013734180344370765, "first_commit": "2024-01-07 19:51:58", "latest_commit": "2024-05-22 18:43:24", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Tagging" ] }, { "description": "Llama 3.1 Swallow - Built with Llama Llama 3.1 Swallow is a series of large language models (8B, 70B) that were built by continual pre-training on the Meta Llama 3.1 models.", "url": "https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-8B-Instruct-v0.1", "project_name": "Llama-3.1-Swallow-8B-Instruct-v0.1", "downloads": 5080, "source": "Hugging Face", "score": -0.016041396195086936, "first_commit": "2024-09-24 09:12:40", "latest_commit": "2024-10-09 02:39:13", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "Llama 3.1 Swallow - Built with Llama Llama 3.1 Swallow is a series of large language models (8B, 70B) that were built by continual pre-training on the Meta Llama 3.1 models.", "url": "https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1", "project_name": "Llama-3.1-Swallow-70B-Instruct-v0.1", "downloads": 5052, "source": "Hugging Face", "score": -0.01624017171453325, "first_commit": "2024-09-29 02:36:34", "latest_commit": "2024-10-09 02:39:50", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "rinna/japanese-wav2vec2-base Overview This is a Japanese wav2vec 2.0 Base model trained by rinna Co.", "url": "https://huggingface.co/rinna/japanese-wav2vec2-base", "project_name": "japanese-wav2vec2-base", "downloads": 4932, "source": "Hugging Face", "score": -0.017092066797874606, "first_commit": "2024-03-06 01:07:56", "latest_commit": "2024-07-22 08:11:46", "languages": [], "model_or_dataset": "model", "model_size": 0.095, "model_architectures": "Wav2Vec2ForPreTraining", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Anime Whisper 🤗🎤📝", "url": "https://huggingface.co/litagin/anime-whisper", "project_name": "anime-whisper", "downloads": 4873, "source": "Hugging Face", "score": -0.017510915213850772, "first_commit": "2024-11-10 06:08:07", "latest_commit": "2024-11-24 08:56:57", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Low-Resource NLP" ] }, { "description": "Trying to consolidate japanese phonetic, and in particular pitch accent resources into one list", "url": "https://github.com/olety/japanese-pitch-accent-resources", "project_name": "japanese-pitch-accent-resources", "stargazers_count": 106, "source": "GitHub", "score": -0.01752852856279478, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Phonetics" ] }, { "description": "hotchpotch/japanese-reranker-cross-encoder-small-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", "url": "https://huggingface.co/hotchpotch/japanese-reranker-cross-encoder-small-v1", "project_name": "japanese-reranker-cross-encoder-small-v1", "downloads": 4856, "source": "Hugging Face", "score": -0.017631600350657466, "first_commit": "2024-03-28 04:31:45", "latest_commit": "2024-04-01 02:39:19", "languages": [], "model_or_dataset": "model", "model_size": 0.11800000000000001, "model_architectures": "XLMRobertaForSequenceClassification", "multi_labels": [ "Language Models" ] }, { "description": "Llama-3.3-70B-Instruct-gguf meta-llamaさんが公開しているLlama-3.3-70B-Instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3.3-70B-Instruct-gguf", "project_name": "Llama-3.3-70B-Instruct-gguf", "downloads": 4835, "source": "Hugging Face", "score": -0.0177806819902422, "first_commit": "2024-12-06 19:52:25", "latest_commit": "2024-12-07 15:41:46", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ModernBERT-Ja-130M", "url": "https://huggingface.co/sbintuitions/modernbert-ja-130m", "project_name": "modernbert-ja-130m", "downloads": 4735, "source": "Hugging Face", "score": -0.01849059455969333, "first_commit": "2025-02-06 06:51:37", "latest_commit": "2025-02-27 02:35:36", "languages": [], "model_or_dataset": "model", "model_size": 0.133, "model_architectures": "ModernBertForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "wikipedia 日本語の文を、各種日本語の embeddings や faiss index へと変換したもの。 ", "url": "https://huggingface.co/datasets/hotchpotch/wikipedia-passages-jawiki-embeddings", "project_name": "wikipedia-passages-jawiki-embeddings", "downloads": 4542, "source": "Hugging Face", "score": -0.01986072581873401, "first_commit": "2023-11-14 02:28:34", "latest_commit": "2024-02-26 18:56:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "JaQuAD: Japanese Question Answering Dataset for Machine Reading Comprehension (2022, Skelter Labs)", "url": "https://github.com/SkelterLabsInc/JaQuAD", "project_name": "JaQuAD", "stargazers_count": 105, "source": "GitHub", "score": -0.020360720284412128, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Reasoning", "Natural Language Interfaces", "Question Answering", "Machine Reading Comprehension", "Annotation and Dataset Development" ] }, { "description": "BERT small Japanese finance This is a BERT model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/bert-small-japanese-fin", "project_name": "bert-small-japanese-fin", "downloads": 4448, "source": "Hugging Face", "score": -0.02052804363401807, "first_commit": "2021-10-04 13:15:37", "latest_commit": "2022-12-09 00:41:24", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "rinna/japanese-hubert-base Overview This is a Japanese HuBERT Base model trained by rinna Co.", "url": "https://huggingface.co/rinna/japanese-hubert-base", "project_name": "japanese-hubert-base", "downloads": 4426, "source": "Hugging Face", "score": -0.02068422439929732, "first_commit": "2023-04-28 07:39:44", "latest_commit": "2024-07-20 08:55:38", "languages": [], "model_or_dataset": "model", "model_size": 0.09440000000000001, "model_architectures": "HubertModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "rinna/japanese-hubert-large Overview This is a Japanese HuBERT Large model trained by rinna Co.", "url": "https://huggingface.co/rinna/japanese-hubert-large", "project_name": "japanese-hubert-large", "downloads": 4388, "source": "Hugging Face", "score": -0.020953991175688746, "first_commit": "2024-03-05 10:24:37", "latest_commit": "2024-07-22 08:12:21", "languages": [], "model_or_dataset": "model", "model_size": 0.315, "model_architectures": "HubertModel", "multi_labels": [ "Language Models" ] }, { "description": "Gemma 2 Baku 2B (rinna/gemma-2-baku-2b)", "url": "https://huggingface.co/rinna/gemma-2-baku-2b", "project_name": "gemma-2-baku-2b", "downloads": 4384, "source": "Hugging Face", "score": -0.020982387678466795, "first_commit": "2024-10-01 15:26:31", "latest_commit": "2024-10-03 13:46:54", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": "Gemma2ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "cyberagent-DeepSeek-R1-Distill-Qwen-14B-Japanese-gguf cyberagentさんが公開しているDeepSeek-R1-Distill-Qwen-14B-Japaneseのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/cyberagent-DeepSeek-R1-Distill-Qwen-14B-Japanese-gguf", "project_name": "cyberagent-DeepSeek-R1-Distill-Qwen-14B-Japanese-gguf", "downloads": 4222, "source": "Hugging Face", "score": -0.02213244604097762, "first_commit": "2025-01-27 11:03:33", "latest_commit": "2025-01-27 14:37:13", "languages": [], "model_or_dataset": "model", "model_size": 14.8, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "「LLM-jp-3 172B beta2」利用規約この利用規約（以下「本規約」といいます）は、大学共同利用機関法人情報・システム研究機構国立情報学研究所（以下「提供者」といいます）による開発の成果物として公開する大規模言語モデル「LLM-jp-3 172B beta2」（以下「本プログラム」といいます）の利用に関する条件を定めるものです。", "url": "https://huggingface.co/llm-jp/llm-jp-3-172b-beta2-instruct2", "project_name": "llm-jp-3-172b-beta2-instruct2", "downloads": 4219, "source": "Hugging Face", "score": -0.022153743418061155, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 172.0, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-Chat", "project_name": "Orion-14B-Chat", "downloads": 4181, "source": "Hugging Face", "score": -0.022423510194452587, "first_commit": "2024-01-16 06:03:30", "latest_commit": "2024-04-11 10:48:51", "languages": [], "model_or_dataset": "model", "model_size": 14.5, "model_architectures": "OrionForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "hotchpotch/japanese-reranker-cross-encoder-large-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", "url": "https://huggingface.co/hotchpotch/japanese-reranker-cross-encoder-large-v1", "project_name": "japanese-reranker-cross-encoder-large-v1", "downloads": 4146, "source": "Hugging Face", "score": -0.02267197959376048, "first_commit": "2024-03-28 20:53:30", "latest_commit": "2024-04-01 02:39:45", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Language Models" ] }, { "description": "AXCXEPT-phi-4-deepseek-R1K-RL-EZO-gguf AXCXEPTさんが公開しているphi-4-deepseek-R1K-RL-EZOのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/AXCXEPT-phi-4-deepseek-R1K-RL-EZO-gguf", "project_name": "AXCXEPT-phi-4-deepseek-R1K-RL-EZO-gguf", "downloads": 4108, "source": "Hugging Face", "score": -0.022941746370151908, "first_commit": "2025-01-31 00:25:02", "latest_commit": "2025-01-31 03:16:15", "languages": [], "model_or_dataset": "model", "model_size": 14.7, "model_architectures": null, "multi_labels": [] }, { "description": "t5-base-japanese-web (with Byte-fallback, 32K) Description megagonlabs/t5-base-japanese-web is a T5 (Text-to-Text Transfer Transformer) model pre-trained on Japanese web texts.", "url": "https://huggingface.co/megagonlabs/t5-base-japanese-web", "project_name": "t5-base-japanese-web", "downloads": 4102, "source": "Hugging Face", "score": -0.022984341124318977, "first_commit": "2021-08-24 04:41:45", "latest_commit": "2021-09-06 19:32:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "An example usage of JParaCrawl pre-trained Neural Machine Translation (NMT) models.", "url": "https://github.com/MorinoseiMorizo/jparacrawl-finetune", "project_name": "jparacrawl-finetune", "stargazers_count": 104, "source": "GitHub", "score": -0.023192912006029475, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "llm-jp-3-1.8b", "url": "https://huggingface.co/llm-jp/llm-jp-3-1.8b", "project_name": "llm-jp-3-1.8b", "downloads": 4013, "source": "Hugging Face", "score": -0.02361616331113048, "first_commit": "2024-09-23 12:49:46", "latest_commit": "2024-09-26 18:19:48", "languages": [], "model_or_dataset": "model", "model_size": 1.87, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Model Card for Japanese character-level DeBERTa V2 tiny Model description This is a Japanese DeBERTa V2 tiny model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/deberta-v2-tiny-japanese-char-wwm", "project_name": "deberta-v2-tiny-japanese-char-wwm", "downloads": 3997, "source": "Hugging Face", "score": -0.02372974932224266, "first_commit": "2023-01-05 08:48:29", "latest_commit": "2023-03-23 07:31:19", "languages": [], "model_or_dataset": "model", "model_size": 0.0101, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "reazonspeech-nemo-v2 reazonspeech-nemo-v2 is an automatic speech recognition model trained on ReazonSpeech v2.0 corpus.", "url": "https://huggingface.co/reazon-research/reazonspeech-nemo-v2", "project_name": "reazonspeech-nemo-v2", "downloads": 3976, "source": "Hugging Face", "score": -0.0238788309618274, "first_commit": "2024-01-30 01:49:12", "latest_commit": "2024-02-14 01:32:45", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech Recognition", "Text Generation", "Speech & Audio in NLP", "Multimodality" ] }, { "description": "DeepSeek-R1-Distill-Qwen-14B-Japanese Model Description This is a Japanese finetuned model based on deepseek-ai/DeepSeek-R1-Distill-Qwen-14B. ", "url": "https://huggingface.co/cyberagent/DeepSeek-R1-Distill-Qwen-14B-Japanese", "project_name": "DeepSeek-R1-Distill-Qwen-14B-Japanese", "downloads": 3975, "source": "Hugging Face", "score": -0.02388593008752191, "first_commit": "2025-01-27 06:45:15", "latest_commit": "2025-01-27 07:04:18", "languages": [], "model_or_dataset": "model", "model_size": 14.8, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Ninja-v1-NSFW-gguf Local-Novel-LLM-projectさんが公開しているNinja-v1-NSFWのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Ninja-v1-NSFW-gguf", "project_name": "Ninja-v1-NSFW-gguf", "downloads": 3967, "source": "Hugging Face", "score": -0.023942723093078003, "first_commit": "2024-05-03 14:03:23", "latest_commit": "2024-05-04 13:26:52", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "Model Card for Model ID 実験モデルです /", "url": "https://huggingface.co/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1", "project_name": "Llama-3-70B-japanese-suzume-vector-v0.1", "downloads": 3944, "source": "Hugging Face", "score": -0.02410600298405176, "first_commit": "2024-04-28 04:11:49", "latest_commit": "2024-04-28 07:46:32", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Model Description llava-calm2-siglip is an experimental Vision Language Model that can answer questions in Japanese about images.", "url": "https://huggingface.co/cyberagent/llava-calm2-siglip", "project_name": "llava-calm2-siglip", "downloads": 3924, "source": "Hugging Face", "score": -0.024247985497941986, "first_commit": "2024-06-12 19:35:20", "latest_commit": "2024-06-12 19:40:39", "languages": [], "model_or_dataset": "model", "model_size": 7.46, "model_architectures": "LlavaForConditionalGeneration", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "deepseek-r1-distill-qwen2.5-bakeneko-32b-gguf rinnaさんが公開しているdeepseek-r1-distill-qwen2.5-bakeneko-32bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/deepseek-r1-distill-qwen2.5-bakeneko-32b-gguf", "project_name": "deepseek-r1-distill-qwen2.5-bakeneko-32b-gguf", "downloads": 3795, "source": "Hugging Face", "score": -0.025163772712533942, "first_commit": "2025-02-18 01:30:37", "latest_commit": "2025-02-18 09:23:04", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "RakutenAI-7B-chat Model Description RakutenAI-7B is a systematic initiative that brings the latest technologies to the world of Japanese LLMs.", "url": "https://huggingface.co/Rakuten/RakutenAI-7B-chat", "project_name": "RakutenAI-7B-chat", "downloads": 3700, "source": "Hugging Face", "score": -0.025838189653512516, "first_commit": "2024-03-18 06:46:06", "latest_commit": "2025-02-10 07:25:48", "languages": [], "model_or_dataset": "model", "model_size": 7.37, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "参議院の公式ウェブサイトから会派、議員、議案、質問主意書のデータを整理しました。", "url": "https://github.com/smartnews-smri/house-of-councillors", "project_name": "house-of-councillors", "stargazers_count": 103, "source": "GitHub", "score": -0.02602510372764682, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "llm-jp-3-7.2b-instruct3 LLM-jp-3 is the series of large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-7.2b-instruct3", "project_name": "llm-jp-3-7.2b-instruct3", "downloads": 3665, "source": "Hugging Face", "score": -0.026086659052820413, "first_commit": "2025-01-31 01:29:06", "latest_commit": "2025-02-04 04:58:45", "languages": [], "model_or_dataset": "model", "model_size": 7.29, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "License:CreativeML Open RAIL-M Additional Copyright: sazyou_roukaku (TwitterID @sazyou_roukaku) as of May 31, 2023 このモデルは『CreativeML Open RAIL-M』でLicenseそのものに変更はありません。 ", "url": "https://huggingface.co/sazyou-roukaku/BracingEvoMix", "project_name": "BracingEvoMix", "downloads": 3462, "source": "Hugging Face", "score": -0.027527781568806205, "first_commit": "2023-05-31 10:29:16", "latest_commit": "2023-10-01 08:58:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "cyberagent-DeepSeek-R1-Distill-Qwen-32B-Japanese-gguf cyberagentさんが公開しているDeepSeek-R1-Distill-Qwen-32B-Japaneseのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/cyberagent-DeepSeek-R1-Distill-Qwen-32B-Japanese-gguf", "project_name": "cyberagent-DeepSeek-R1-Distill-Qwen-32B-Japanese-gguf", "downloads": 3359, "source": "Hugging Face", "score": -0.028258991515340868, "first_commit": "2025-01-27 11:17:24", "latest_commit": "2025-01-27 18:55:57", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "PLaMo 2 1B Model Description PLaMo 2 1B is a 1B model pre-trained on English and Japanese datasets, developed by Preferred Elements, Inc.", "url": "https://huggingface.co/pfnet/plamo-2-1b", "project_name": "plamo-2-1b", "downloads": 3355, "source": "Hugging Face", "score": -0.028287388018118913, "first_commit": "2025-02-06 20:36:47", "latest_commit": "2025-02-06 20:36:47", "languages": [], "model_or_dataset": "model", "model_size": 1.29, "model_architectures": "PlamoForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "Ruri: Japanese General Text Embeddings Usage First install the Sentence Transformers library: pip install -U sentence-transformers fugashi sentencepiece unidic-lite Then you can load this model and run inference.", "url": "https://huggingface.co/cl-nagoya/ruri-large-v2", "project_name": "ruri-large-v2", "downloads": 3293, "source": "Hugging Face", "score": -0.02872753381117861, "first_commit": "2024-12-06 11:53:17", "latest_commit": "2025-03-16 14:45:33", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "BertModel", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Llama 3.3 Swallow - Built with Llama Llama 3.3 Swallow is a large language model (70B) that was built by continual pre-training on the Meta Llama 3.3 model.", "url": "https://huggingface.co/tokyotech-llm/Llama-3.3-Swallow-70B-Instruct-v0.4", "project_name": "Llama-3.3-Swallow-70B-Instruct-v0.4", "downloads": 3198, "source": "Hugging Face", "score": -0.029401950752157183, "first_commit": "2025-03-03 04:33:50", "latest_commit": "2025-03-17 02:18:57", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Annotation and Dataset Development" ] }, { "description": "bert-base-japanese-v3-jsts 「大規模言語モデル入門」の第5章で紹介している(意味類似度計算)のモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-jsts", "project_name": "bert-base-japanese-v3-jsts", "downloads": 3187, "source": "Hugging Face", "score": -0.029480041134796808, "first_commit": "2023-06-11 15:27:32", "latest_commit": "2023-07-29 11:27:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "RakutenAI-2.0-8x7B-instruct Model Description RakutenAI-2.0-8x7B-instruct is a fine-tuned variant of RakutenAI-2.0-8x7B, designed to push the boundaries of Japanese large language models (LLMs).", "url": "https://huggingface.co/Rakuten/RakutenAI-2.0-8x7B-instruct", "project_name": "RakutenAI-2.0-8x7B-instruct", "downloads": 3184, "source": "Hugging Face", "score": -0.029501338511880342, "first_commit": "2025-01-31 05:58:31", "latest_commit": "2025-02-26 05:51:20", "languages": [], "model_or_dataset": "model", "model_size": 46.8, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "英語+日本語T5事前学習済みモデル This is a T5 (Text-to-Text Transfer Transformer) model pretrained on English and Japanese balanced corpus. ", "url": "https://huggingface.co/sonoisa/t5-base-english-japanese", "project_name": "t5-base-english-japanese", "downloads": 3166, "source": "Hugging Face", "score": -0.029629122774381546, "first_commit": "2022-07-28 11:31:28", "latest_commit": "2022-08-27 09:07:53", "languages": [], "model_or_dataset": "model", "model_size": 0.248, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Ruri: Japanese General Text Embeddings Ruri v3 is a general-purpose Japanese text embedding model built on top of ModernBERT-Ja.", "url": "https://huggingface.co/cl-nagoya/ruri-v3-310m", "project_name": "ruri-v3-310m", "downloads": 3145, "source": "Hugging Face", "score": -0.029778204413966284, "first_commit": "2025-04-09 04:11:55", "latest_commit": "2025-04-17 09:38:05", "languages": [], "model_or_dataset": "model", "model_size": 0.315, "model_architectures": "ModernBertModel", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "Ninja-v1-NSFW-128k-gguf Local-Novel-LLM-projectさんが公開しているNinja-v1-NSFW-128kのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Ninja-v1-NSFW-128k-gguf", "project_name": "Ninja-v1-NSFW-128k-gguf", "downloads": 3085, "source": "Hugging Face", "score": -0.03020415195563696, "first_commit": "2024-05-01 17:45:52", "latest_commit": "2024-05-04 13:25:47", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "BERTによる日本語固有表現抽出のモデル BertForTokenClassificationを用いて、日本語の文から固有表現を抽出します。 ", "url": "https://huggingface.co/jurabi/bert-ner-japanese", "project_name": "bert-ner-japanese", "downloads": 3048, "source": "Hugging Face", "score": -0.030466819606333878, "first_commit": "2022-09-26 07:46:38", "latest_commit": "2022-09-26 12:13:44", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Named Entity Recognition", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "Llama-3.1-70B-Japanese-Instruct-2407-gguf cyberagentさんが公開しているLlama-3.1-70B-Japanese-Instruct-2407のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf", "project_name": "Llama-3.1-70B-Japanese-Instruct-2407-gguf", "downloads": 3004, "source": "Hugging Face", "score": -0.030779181136892375, "first_commit": "2024-07-26 09:05:34", "latest_commit": "2024-07-27 05:59:10", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Qwen2.5 Bakeneko 32B Instruct GGUF (rinna/qwen2.5-bakeneko-32b-instruct-gguf)", "url": "https://huggingface.co/rinna/qwen2.5-bakeneko-32b-instruct-gguf", "project_name": "qwen2.5-bakeneko-32b-instruct-gguf", "downloads": 3003, "source": "Hugging Face", "score": -0.030786280262586885, "first_commit": "2025-02-12 08:41:19", "latest_commit": "2025-02-14 04:32:24", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [ "Language Models", "Green & Sustainable NLP", "Semantic Text Processing" ] }, { "description": "ELYZA-tasks-100: 日本語instructionモデル評価データセット Data Description 本データセットはinstruction-tuningを行ったモデルの評価用データセットです。", "url": "https://huggingface.co/datasets/elyza/ELYZA-tasks-100", "project_name": "ELYZA-tasks-100", "downloads": 2984, "source": "Hugging Face", "score": -0.0309211636507826, "first_commit": "2023-08-28 09:01:44", "latest_commit": "2023-12-27 18:17:36", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-7B-GGUF", "project_name": "japanese-stablelm-instruct-beta-7B-GGUF", "downloads": 2975, "source": "Hugging Face", "score": -0.030985055782033203, "first_commit": "2023-11-03 01:04:31", "latest_commit": "2023-11-03 12:54:55", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "X-GENRE classifier - multilingual text genre classifier Text classification model based on xlm-roberta-base and fine-tuned on a combination of three genre datasets: Slovene GINCO dataset (Kuzman et al.", "url": "https://huggingface.co/classla/xlm-roberta-base-multilingual-text-genre-classifier", "project_name": "xlm-roberta-base-multilingual-text-genre-classifier", "downloads": 2946, "source": "Hugging Face", "score": -0.03119093042717403, "first_commit": "2022-11-11 09:33:55", "latest_commit": "2024-08-12 10:58:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "XLMRobertaForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "中文 | 한국어 | 日本語 | Русский | Deutsch | Français | Español | Português | Türkçe | Tiếng Việt | العربية Ultralytics YOLO11 is a cutting-edge, state-of-the-art (SOTA) model that builds upon the success of previous YOLO versions and introduces new features and improvements to further boost performance and flexibility.", "url": "https://huggingface.co/Ultralytics/YOLO11", "project_name": "YOLO11", "downloads": 2936, "source": "Hugging Face", "score": -0.03126192168411914, "first_commit": "2024-10-16 12:53:43", "latest_commit": "2025-01-11 19:09:02", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality" ] }, { "description": "Fugaku-LLM-13B-instruct-gguf Fugaku-LLMさんが公開しているFugaku-LLM-13B-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Fugaku-LLM-13B-instruct-gguf", "project_name": "Fugaku-LLM-13B-instruct-gguf", "downloads": 2926, "source": "Hugging Face", "score": -0.03133291294106425, "first_commit": "2024-05-10 16:43:49", "latest_commit": "2024-05-12 06:06:51", "languages": [], "model_or_dataset": "model", "model_size": 13.4, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "Japanese Word Similarity Dataset", "url": "https://github.com/tmu-nlp/JapaneseWordSimilarityDataset", "project_name": "JapaneseWordSimilarityDataset", "stargazers_count": 101, "source": "GitHub", "score": -0.03168948717088151, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Semantic Similarity", "Semantic Text Processing" ] }, { "description": "Model Card for gemma-2-2b-jpn-it-translate-gguf gemma-2-2b-jpn-it-translate-ggufは、日英・英日翻訳タスクに特化したSLM（Small Language Model）です。", "url": "https://huggingface.co/webbigdata/gemma-2-2b-jpn-it-translate-gguf", "project_name": "gemma-2-2b-jpn-it-translate-gguf", "downloads": 2857, "source": "Hugging Face", "score": -0.03182275261398553, "first_commit": "2024-10-07 14:59:02", "latest_commit": "2024-10-10 14:12:46", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Semantic Text Processing" ] }, { "description": "Llama-3.1-8B-EZO-1.1-it-gguf HODACHIさんが公開しているLlama-3.1-8B-EZO-1.1-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3.1-8B-EZO-1.1-it-gguf", "project_name": "Llama-3.1-8B-EZO-1.1-it-gguf", "downloads": 2805, "source": "Hugging Face", "score": -0.03219190715010012, "first_commit": "2024-07-31 11:06:36", "latest_commit": "2024-07-31 12:47:45", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "umiyuki-Umievo-itr012-Gleipnir-7B-gguf umiyukiさんが公開しているUmievo-itr012-Gleipnir-7Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/umiyuki-Umievo-itr012-Gleipnir-7B-gguf", "project_name": "umiyuki-Umievo-itr012-Gleipnir-7B-gguf", "downloads": 2752, "source": "Hugging Face", "score": -0.03256816081190922, "first_commit": "2024-05-29 15:05:32", "latest_commit": "2024-05-29 15:53:40", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [] }, { "description": "This is a Japanese sentence-LUKE model.", "url": "https://huggingface.co/sonoisa/sentence-luke-japanese-base-lite", "project_name": "sentence-luke-japanese-base-lite", "downloads": 2745, "source": "Hugging Face", "score": -0.0326178546917708, "first_commit": "2023-03-19 14:44:42", "latest_commit": "2023-03-20 01:32:34", "languages": [], "model_or_dataset": "model", "model_size": 0.133, "model_architectures": "LukeModel", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Llama-3-ELYZA-JP-8B-GGUF Model Description Llama-3-ELYZA-JP-8B is a large language model trained by ELYZA, Inc.", "url": "https://huggingface.co/elyza/Llama-3-ELYZA-JP-8B-GGUF", "project_name": "Llama-3-ELYZA-JP-8B-GGUF", "downloads": 2737, "source": "Hugging Face", "score": -0.03267464769732689, "first_commit": "2024-06-25 07:29:22", "latest_commit": "2024-06-26 02:56:52", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "ELYZA-japanese-Llama-2-7b-fast-instruct-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-7b-fast-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-7b-fast-instruct-gguf", "project_name": "ELYZA-japanese-Llama-2-7b-fast-instruct-gguf", "downloads": 2732, "source": "Hugging Face", "score": -0.03271014332579945, "first_commit": "2023-08-29 15:31:01", "latest_commit": "2023-11-16 14:27:48", "languages": [], "model_or_dataset": "model", "model_size": 6.85, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Ruri-Reranker: Japanese General Reranker Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-reranker-large", "project_name": "ruri-reranker-large", "downloads": 2724, "source": "Hugging Face", "score": -0.03276693633135554, "first_commit": "2024-08-20 02:37:26", "latest_commit": "2024-09-04 08:50:12", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "JMTEB:", "url": "https://huggingface.co/datasets/sbintuitions/JMTEB", "project_name": "JMTEB", "downloads": 2672, "source": "Hugging Face", "score": -0.033136090867470123, "first_commit": "2024-02-22 18:15:27", "latest_commit": "2024-06-28 15:18:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Responsible & Trustworthy NLP", "Representation Learning", "Text Classification", "Semantic Text Processing" ] }, { "description": "lightblue-suzume-llama-3-8B-japanese-gguf lightblueさんが公開しているsuzume-llama-3-8B-japaneseのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/lightblue-suzume-llama-3-8B-japanese-gguf", "project_name": "lightblue-suzume-llama-3-8B-japanese-gguf", "downloads": 2634, "source": "Hugging Face", "score": -0.03340585764386155, "first_commit": "2024-04-23 13:30:08", "latest_commit": "2024-05-07 12:58:06", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Multilinguality", "Annotation and Dataset Development" ] }, { "description": "【告知】chilled_remix及びreversemixは2023年5月21日にVersion変更を行い、v2へ移行いたしました。", "url": "https://huggingface.co/sazyou-roukaku/chilled_remix", "project_name": "chilled_remix", "downloads": 2621, "source": "Hugging Face", "score": -0.033498146277890196, "first_commit": "2023-04-18 12:48:48", "latest_commit": "2023-06-09 23:08:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "To load a language pair which isn't part of the config, all you need to do is specify the language code as pairs.", "url": "https://huggingface.co/datasets/Helsinki-NLP/tatoeba", "project_name": "tatoeba", "downloads": 2614, "source": "Hugging Face", "score": -0.03354784015775178, "first_commit": "2022-01-25 16:36:30", "latest_commit": "2024-01-18 11:16:48", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-hf", "project_name": "Swallow-7b-hf", "downloads": 2613, "source": "Hugging Face", "score": -0.033554939283446286, "first_commit": "2023-11-25 10:09:49", "latest_commit": "2024-06-29 08:56:17", "languages": [], "model_or_dataset": "model", "model_size": 6.83, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "bert-base-japanese-v3-unsup-simcse-jawiki 「大規模言語モデル入門」の第8章で紹介している教師なしSimCSEのモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-unsup-simcse-jawiki", "project_name": "bert-base-japanese-v3-unsup-simcse-jawiki", "downloads": 2606, "source": "Hugging Face", "score": -0.03360463316330787, "first_commit": "2023-06-21 10:52:27", "latest_commit": "2023-07-24 07:07:44", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Kotoba-Whisper-v2.1 Kotoba-Whisper-v2.1 is a Japanese ASR model based on kotoba-tech/kotoba-whisper-v2.0, with additional postprocessing stacks integrated as pipeline.", "url": "https://huggingface.co/kotoba-tech/kotoba-whisper-v2.1", "project_name": "kotoba-whisper-v2.1", "downloads": 2602, "source": "Hugging Face", "score": -0.03363302966608591, "first_commit": "2024-09-17 14:19:45", "latest_commit": "2024-09-20 01:55:12", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "[Llama-3-EZO model card]", "url": "https://huggingface.co/AXCXEPT/Llama-3-EZO-8b-Common-it", "project_name": "Llama-3-EZO-8b-Common-it", "downloads": 2602, "source": "Hugging Face", "score": -0.03363302966608591, "first_commit": "2024-07-13 06:42:31", "latest_commit": "2024-08-23 10:52:05", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Llama-3.1-8B-Instruct-gguf meta-llamaさんが公開しているMeta-Llama-3.1-8B-Instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3.1-8B-Instruct-gguf", "project_name": "Llama-3.1-8B-Instruct-gguf", "downloads": 2483, "source": "Hugging Face", "score": -0.03447782562373276, "first_commit": "2024-07-23 16:33:06", "latest_commit": "2024-07-24 21:04:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Asynchronous japanese morphological analyser using MeCab.", "url": "https://github.com/hecomi/node-mecab-async", "project_name": "node-mecab-async", "stargazers_count": 100, "source": "GitHub", "score": -0.03452167889249886, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Kotoba-Whisper Kotoba-Whisper is a collection of distilled Whisper models for Japanese ASR, developed through the collaboration bewteen Asahi Ushio and Kotoba Technologies.", "url": "https://huggingface.co/kotoba-tech/kotoba-whisper-v1.0", "project_name": "kotoba-whisper-v1.0", "downloads": 2472, "source": "Hugging Face", "score": -0.03455591600637238, "first_commit": "2024-04-14 08:53:48", "latest_commit": "2024-05-08 12:40:53", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "line-corporation/japanese-large-lm-1.7b-instruction-sft line-corporationさんが公開しているjapanese-large-lm-1.7b-instruction-sftのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/line-corp-japanese-large-lm-1.7b-instruction-sft-gguf", "project_name": "line-corp-japanese-large-lm-1.7b-instruction-sft-gguf", "downloads": 2452, "source": "Hugging Face", "score": -0.03469789852026261, "first_commit": "2023-09-03 22:30:23", "latest_commit": "2024-03-24 05:54:56", "languages": [], "model_or_dataset": "model", "model_size": 1.77, "model_architectures": null, "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "line-corporation/japanese-large-lm-1.7b line-corporationさんが公開しているjapanese-large-lm-1.7bのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/line-corp-japanese-large-lm-1.7b-gguf", "project_name": "line-corp-japanese-large-lm-1.7b-gguf", "downloads": 2449, "source": "Hugging Face", "score": -0.03471919589734614, "first_commit": "2023-09-03 22:35:34", "latest_commit": "2024-03-24 05:54:30", "languages": [], "model_or_dataset": "model", "model_size": 1.77, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-xl", "project_name": "t5-xl", "downloads": 2334, "source": "Hugging Face", "score": -0.03553559535221494, "first_commit": "2023-04-26 07:19:08", "latest_commit": "2023-05-10 10:01:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Llama 3.1 Swallow - Built with Llama Llama 3.1 Swallow is a series of large language models (8B, 70B) that were built by continual pre-training on the Meta Llama 3.1 models.", "url": "https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.3", "project_name": "Llama-3.1-Swallow-70B-Instruct-v0.3", "downloads": 2326, "source": "Hugging Face", "score": -0.03559238835777103, "first_commit": "2024-12-25 13:21:28", "latest_commit": "2025-01-27 04:03:16", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "BERT base Japanese (character tokenization, whole word masking enabled)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-char-whole-word-masking", "project_name": "bert-base-japanese-char-whole-word-masking", "downloads": 2300, "source": "Hugging Face", "score": -0.03577696562582832, "first_commit": "2020-04-28 21:34:13", "latest_commit": "2024-02-22 00:58:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese-StableLM-Instruct-Beta-70B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-instruct-beta-70b is a 70B-parameter decoder-only language model based on japanese-stablelm-base-beta-70b and further fine tuned on Databricks Dolly-15k, Anthropic HH, and other public data.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-beta-70b", "project_name": "japanese-stablelm-instruct-beta-70b", "downloads": 2296, "source": "Hugging Face", "score": -0.03580536212860637, "first_commit": "2023-10-30 07:47:31", "latest_commit": "2023-12-19 06:45:10", "languages": [], "model_or_dataset": "model", "model_size": 69.0, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "japanese-stablelm-2-instruct-1_6b-gguf stabilityaiさんが公開しているjapanese-stablelm-2-instruct-1_6bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/japanese-stablelm-2-instruct-1_6b-gguf", "project_name": "japanese-stablelm-2-instruct-1_6b-gguf", "downloads": 2283, "source": "Hugging Face", "score": -0.035897650762635014, "first_commit": "2024-05-11 07:26:43", "latest_commit": "2024-05-11 09:56:19", "languages": [], "model_or_dataset": "model", "model_size": 1.64, "model_architectures": null, "multi_labels": [] }, { "description": "本モデルはDeepSeek-R1-Distill-Qwen-14Bを日本語で微調整したモデルです。", "url": "https://huggingface.co/dahara1/DeepSeek-R1-Distill-Qwen-14B-unsloth-gguf-japanese-imatrix", "project_name": "DeepSeek-R1-Distill-Qwen-14B-unsloth-gguf-japanese-imatrix", "downloads": 2277, "source": "Hugging Face", "score": -0.03594024551680208, "first_commit": "2025-01-23 13:47:46", "latest_commit": "2025-01-25 01:42:35", "languages": [], "model_or_dataset": "model", "model_size": 14.8, "model_architectures": null, "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Llama3 Swallow - Built with Meta Llama 3", "url": "https://huggingface.co/tokyotech-llm/Llama-3-Swallow-8B-v0.1", "project_name": "Llama-3-Swallow-8B-v0.1", "downloads": 2241, "source": "Hugging Face", "score": -0.03619581404180449, "first_commit": "2024-05-20 06:36:00", "latest_commit": "2024-07-01 06:24:48", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "OpenCALM-Small Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", "url": "https://huggingface.co/cyberagent/open-calm-small", "project_name": "open-calm-small", "downloads": 2219, "source": "Hugging Face", "score": -0.03635199480708374, "first_commit": "2023-05-15 06:40:15", "latest_commit": "2023-05-18 01:10:33", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "AIBunCho/japanese-novel-gpt-j-6b AI BunChoさんが公開しているjapanese-novel-gpt-j-6bのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/aibuncho-japanese-novel-gpt-j-6b-gguf", "project_name": "aibuncho-japanese-novel-gpt-j-6b-gguf", "downloads": 2201, "source": "Hugging Face", "score": -0.03647977906958494, "first_commit": "2023-09-03 17:32:44", "latest_commit": "2023-09-11 01:10:36", "languages": [], "model_or_dataset": "model", "model_size": 6.05, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-hf", "project_name": "Swallow-70b-hf", "downloads": 2090, "source": "Hugging Face", "score": -0.037267782021675695, "first_commit": "2023-11-25 02:13:04", "latest_commit": "2024-06-29 08:56:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-13b-hf", "project_name": "Swallow-13b-hf", "downloads": 2062, "source": "Hugging Face", "score": -0.03746655754112201, "first_commit": "2023-11-16 15:40:49", "latest_commit": "2024-06-29 08:56:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "CC-MAIN-2019-39へようこそ本データセットはCommonCrawlerと呼ばれるものから日本語のみを抽出したものです。 ", "url": "https://huggingface.co/datasets/cc-clean/CC-MAIN-2019-39", "project_name": "CC-MAIN-2019-39", "downloads": 2055, "source": "Hugging Face", "score": -0.03751625142098359, "first_commit": "2024-12-11 07:58:31", "latest_commit": "2024-12-11 12:53:42", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "FINGU-AI/FinguAI-Chat-v1 Overview The FINGU-AI/FinguAI-Chat-v1 model offers a specialized curriculum tailored to English, Korean, and Japanese speakers interested in finance, investment, and legal frameworks.", "url": "https://huggingface.co/FINGU-AI/FinguAI-Chat-v1", "project_name": "FinguAI-Chat-v1", "downloads": 2037, "source": "Hugging Face", "score": -0.037644035683484796, "first_commit": "2024-03-21 07:08:05", "latest_commit": "2024-03-22 09:36:44", "languages": [], "model_or_dataset": "model", "model_size": 0.464, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "PLaMo-13B Model Description PLaMo-13B is a LLaMA-based 13B model pre-trained on English and Japanese open datasets, developed by Preferred Networks, Inc. ", "url": "https://huggingface.co/pfnet/plamo-13b", "project_name": "plamo-13b", "downloads": 2033, "source": "Hugging Face", "score": -0.03767243218626284, "first_commit": "2023-09-25 12:47:05", "latest_commit": "2023-10-10 15:24:54", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "PlamoForCausalLM", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "llm-jp-3-13b-instruct This repository provides large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-13b-instruct", "project_name": "llm-jp-3-13b-instruct", "downloads": 2027, "source": "Hugging Face", "score": -0.037715026940429906, "first_commit": "2024-09-23 13:17:09", "latest_commit": "2024-09-26 18:21:20", "languages": [], "model_or_dataset": "model", "model_size": 13.7, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "old？ ", "url": "https://huggingface.co/Lasorco/lametta_old", "project_name": "lametta_old", "downloads": 2027, "source": "Hugging Face", "score": -0.037715026940429906, "first_commit": "2023-05-21 11:16:50", "latest_commit": "2024-07-23 07:24:33", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ELYZA-japanese-Llama-2-13b Model Description ELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-13b", "project_name": "ELYZA-japanese-Llama-2-13b", "downloads": 1932, "source": "Hugging Face", "score": -0.038389443881408476, "first_commit": "2023-12-25 16:38:08", "latest_commit": "2023-12-27 01:40:43", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "aya-23-8B-gguf CohereForAIさんが公開しているaya-23-8Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/aya-23-8B-gguf", "project_name": "aya-23-8B-gguf", "downloads": 1932, "source": "Hugging Face", "score": -0.038389443881408476, "first_commit": "2024-05-26 16:32:53", "latest_commit": "2024-05-27 00:54:36", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "sbintuitions/sarashina2.2-0.5b-instruct-v0.1 Model Summary", "url": "https://huggingface.co/sbintuitions/sarashina2.2-0.5b-instruct-v0.1", "project_name": "sarashina2.2-0.5b-instruct-v0.1", "downloads": 1929, "source": "Hugging Face", "score": -0.03841074125849201, "first_commit": "2025-03-03 16:38:49", "latest_commit": "2025-03-05 07:02:01", "languages": [], "model_or_dataset": "model", "model_size": 0.793, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Explainability & Interpretability in NLP" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-instruct-hf", "project_name": "Swallow-7b-instruct-hf", "downloads": 1925, "source": "Hugging Face", "score": -0.03843913776127006, "first_commit": "2023-12-07 02:18:36", "latest_commit": "2024-06-29 08:56:26", "languages": [], "model_or_dataset": "model", "model_size": 6.83, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "ELYZA-japanese-Llama-2-7b-fast-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-7b-fastのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-7b-fast-gguf", "project_name": "ELYZA-japanese-Llama-2-7b-fast-gguf", "downloads": 1919, "source": "Hugging Face", "score": -0.03848173251543713, "first_commit": "2023-08-29 07:23:20", "latest_commit": "2023-11-16 14:27:36", "languages": [], "model_or_dataset": "model", "model_size": 6.85, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "ELYZA-japanese-Llama-2-7b Model Description ELYZA-japanese-Llama-2-7b は、 Llama2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b", "project_name": "ELYZA-japanese-Llama-2-7b", "downloads": 1878, "source": "Hugging Face", "score": -0.03877279666891209, "first_commit": "2023-08-28 12:38:34", "latest_commit": "2023-08-29 03:45:51", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "japanese-gpt-neox-3.6b-instruction-ppo Overview This repository provides a Japanese GPT-NeoX model of 3.6 billion parameters.", "url": "https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-ppo", "project_name": "japanese-gpt-neox-3.6b-instruction-ppo", "downloads": 1797, "source": "Hugging Face", "score": -0.0393478258501675, "first_commit": "2023-05-30 01:50:48", "latest_commit": "2024-07-20 07:58:49", "languages": [], "model_or_dataset": "model", "model_size": 3.76, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-small-short", "project_name": "t5-small-short", "downloads": 1794, "source": "Hugging Face", "score": -0.039369123227251036, "first_commit": "2023-04-25 04:37:20", "latest_commit": "2023-05-10 09:55:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "🍷 FineWeb2 Edu Japanese: High-Quality Educational Japanese", "url": "https://huggingface.co/datasets/hotchpotch/fineweb-2-edu-japanese", "project_name": "fineweb-2-edu-japanese", "downloads": 1779, "source": "Hugging Face", "score": -0.03947561011266871, "first_commit": "2025-02-19 00:52:30", "latest_commit": "2025-02-20 05:51:47", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "RoSEtta RoSEtta (RoFormer-based Sentence Encoder through Distillation) is a general Japanese text embedding model, excelling in retrieval tasks.", "url": "https://huggingface.co/pkshatech/RoSEtta-base-ja", "project_name": "RoSEtta-base-ja", "downloads": 1769, "source": "Hugging Face", "score": -0.03954660136961382, "first_commit": "2024-08-22 03:25:13", "latest_commit": "2024-09-27 05:47:42", "languages": [], "model_or_dataset": "model", "model_size": 0.19, "model_architectures": "RetrievaBertModel", "multi_labels": [ "Representation Learning", "Information Retrieval", "Semantic Text Processing" ] }, { "description": "Japanese Anime Speech Dataset V2 日本語はこちら japanese-anime-speech-v2 is an audio-text dataset designed for training automatic speech recognition models.", "url": "https://huggingface.co/datasets/joujiboi/japanese-anime-speech-v2", "project_name": "japanese-anime-speech-v2", "downloads": 1737, "source": "Hugging Face", "score": -0.03977377339183818, "first_commit": "2024-06-26 14:18:01", "latest_commit": "2024-07-24 19:06:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech Recognition", "Text Generation", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Please feel free to open an issue or pull request.", "url": "https://huggingface.co/datasets/shunk031/JGLUE", "project_name": "JGLUE", "downloads": 1726, "source": "Hugging Face", "score": -0.0398518637744778, "first_commit": "2023-02-25 13:33:13", "latest_commit": "2024-05-21 11:23:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-gamma-7B-GGUF", "project_name": "japanese-stablelm-instruct-gamma-7B-GGUF", "downloads": 1723, "source": "Hugging Face", "score": -0.03987316115156134, "first_commit": "2023-10-28 19:03:17", "latest_commit": "2023-10-28 19:07:41", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese Morphological Analyzer written in Rust", "url": "https://github.com/togatoga/kanpyo", "project_name": "kanpyo", "stargazers_count": 98, "source": "GitHub", "score": -0.04018606233573355, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Model Card for NABLA-VL", "url": "https://huggingface.co/nablasinc/NABLA-VL", "project_name": "NABLA-VL", "downloads": 1673, "source": "Hugging Face", "score": -0.0402281174362869, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 15.1, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "bert-base-japanese-v3-marc_ja 「大規模言語モデル入門」の第5章で紹介している(感情分析)のモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-marc_ja", "project_name": "bert-base-japanese-v3-marc_ja", "downloads": 1655, "source": "Hugging Face", "score": -0.0403559016987881, "first_commit": "2023-06-01 14:29:06", "latest_commit": "2023-07-24 06:49:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "Tanuki-8B-dpo-v1.0-GGUF 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0のGGUF量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-GGUF", "project_name": "Tanuki-8B-dpo-v1.0-GGUF", "downloads": 1653, "source": "Hugging Face", "score": -0.04037009995017713, "first_commit": "2024-08-14 15:05:50", "latest_commit": "2024-08-27 18:00:44", "languages": [], "model_or_dataset": "model", "model_size": 7.51, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Maintainers Junfeng Jiang@Aizawa Lab: jiangjf (at) is.s.u-tokyo.ac.jp Jiahao Huang@Aizawa Lab: jiahao-huang (at) g.ecc.u-tokyo.ac.jp", "url": "https://huggingface.co/datasets/Coldog2333/JMedBench", "project_name": "JMedBench", "downloads": 1646, "source": "Hugging Face", "score": -0.04041979383003871, "first_commit": "2024-08-17 11:12:54", "latest_commit": "2024-09-01 12:41:40", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "ELYZA-japanese-Llama-2-7b Model Description ELYZA-japanese-Llama-2-7b は、 Llama2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b-fast-instruct", "project_name": "ELYZA-japanese-Llama-2-7b-fast-instruct", "downloads": 1634, "source": "Hugging Face", "score": -0.040504983338372845, "first_commit": "2023-08-28 13:36:19", "latest_commit": "2023-08-29 03:47:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Reranker-Scores 既存の日本語検索・QAデータセットについて、データセット中のクエリに付与された正・負例の関連度を多言語・日本語reranker 5種類を用いてスコア付けしたデータセットです。", "url": "https://huggingface.co/datasets/hpprc/reranker-scores", "project_name": "reranker-scores", "downloads": 1629, "source": "Hugging Face", "score": -0.0405404789668454, "first_commit": "2025-03-30 05:11:49", "latest_commit": "2025-04-18 05:35:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Retrieval", "Indexing" ] }, { "description": "lightblue-DeepSeek-R1-Distill-Qwen-7B-Japanese-gguf lightblueさんが公開しているDeepSeek-R1-Distill-Qwen-7B-Japaneseのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/lightblue-DeepSeek-R1-Distill-Qwen-7B-Japanese-gguf", "project_name": "lightblue-DeepSeek-R1-Distill-Qwen-7B-Japanese-gguf", "downloads": 1621, "source": "Hugging Face", "score": -0.04059727197240149, "first_commit": "2025-01-29 09:31:13", "latest_commit": "2025-01-29 10:57:18", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "ELYZA-japanese-Llama-2-7b-instruct-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-7b-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-7b-instruct-gguf", "project_name": "ELYZA-japanese-Llama-2-7b-instruct-gguf", "downloads": 1604, "source": "Hugging Face", "score": -0.04071795710920818, "first_commit": "2023-08-29 05:33:45", "latest_commit": "2023-11-16 14:27:23", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "VNTL Leaderboard", "url": "https://huggingface.co/datasets/lmg-anon/vntl-leaderboard", "project_name": "vntl-leaderboard", "downloads": 1588, "source": "Hugging Face", "score": -0.04083154312032036, "first_commit": "2024-06-08 20:33:07", "latest_commit": "2025-01-02 13:34:32", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-large-short", "project_name": "t5-large-short", "downloads": 1586, "source": "Hugging Face", "score": -0.04084574137170938, "first_commit": "2023-04-26 08:18:58", "latest_commit": "2023-05-10 10:00:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/ogiri-bokete\", split=\"train\") 概要大喜利投稿サイトBoketeのクロールデータです。", "url": "https://huggingface.co/datasets/YANS-official/ogiri-bokete", "project_name": "ogiri-bokete", "downloads": 1585, "source": "Hugging Face", "score": -0.0408528404974039, "first_commit": "2024-07-21 09:58:15", "latest_commit": "2024-08-31 09:24:55", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation", "Annotation and Dataset Development" ] }, { "description": "Japanese StableLM-3B-4E1T Base Model Description This is a 3B-parameter decoder-only language model with a focus on maximizing Japanese language modeling performance and Japanese downstream task performance.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-3b-4e1t-base", "project_name": "japanese-stablelm-3b-4e1t-base", "downloads": 1565, "source": "Hugging Face", "score": -0.040994823011294125, "first_commit": "2023-10-16 06:04:58", "latest_commit": "2024-04-26 03:20:34", "languages": [], "model_or_dataset": "model", "model_size": 2.8, "model_architectures": "StableLMEpochForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Ruri-Reranker: Japanese General Reranker Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-reranker-base", "project_name": "ruri-reranker-base", "downloads": 1554, "source": "Hugging Face", "score": -0.04107291339393375, "first_commit": "2024-08-20 01:10:40", "latest_commit": "2024-09-04 08:50:21", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "roberta-small-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-small-japanese-luw-upos", "project_name": "roberta-small-japanese-luw-upos", "downloads": 1550, "source": "Hugging Face", "score": -0.04110130989671179, "first_commit": "2021-11-03 05:51:58", "latest_commit": "2022-09-18 19:45:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Text Classification", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "高性能な日本語 SPLADE (Sparse Lexical and Expansion Model) モデルです。", "url": "https://huggingface.co/hotchpotch/japanese-splade-v2", "project_name": "japanese-splade-v2", "downloads": 1529, "source": "Hugging Face", "score": -0.041250391536296525, "first_commit": "2024-12-16 00:19:01", "latest_commit": "2024-12-23 20:51:48", "languages": [], "model_or_dataset": "model", "model_size": 0.136, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "本モデルは『CreativeML Open RAIL-M』の範囲でラインセンスされます。 ", "url": "https://huggingface.co/Kotajiro/yayoi_mix", "project_name": "yayoi_mix", "downloads": 1505, "source": "Hugging Face", "score": -0.0414207705529648, "first_commit": "2023-06-03 13:39:19", "latest_commit": "2025-04-12 02:48:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "OpenCALM-Medium Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", "url": "https://huggingface.co/cyberagent/open-calm-medium", "project_name": "open-calm-medium", "downloads": 1496, "source": "Hugging Face", "score": -0.0414846626842154, "first_commit": "2023-05-15 06:44:47", "latest_commit": "2023-05-18 01:10:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese Anime Speech Dataset 日本語はこちら japanese-anime-speech is an audio-text dataset designed for the training of automatic speech recognition models.", "url": "https://huggingface.co/datasets/joujiboi/japanese-anime-speech", "project_name": "japanese-anime-speech", "downloads": 1496, "source": "Hugging Face", "score": -0.0414846626842154, "first_commit": "2023-11-07 13:53:40", "latest_commit": "2024-06-30 10:06:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech Recognition", "Text Generation", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Evaluation on MIRACL japanese These models don't train on the MIRACL training data.", "url": "https://huggingface.co/aken12/splade-japanese-v3", "project_name": "splade-japanese-v3", "downloads": 1495, "source": "Hugging Face", "score": -0.04149176180990991, "first_commit": "2024-03-29 12:35:47", "latest_commit": "2024-05-22 02:59:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [] }, { "description": "lightblue-suzume-llama-3-8B-multilingual-gguf lightblueさんが公開しているsuzume-llama-3-8B-multilingualのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/lightblue-suzume-llama-3-8B-multilingual-gguf", "project_name": "lightblue-suzume-llama-3-8B-multilingual-gguf", "downloads": 1495, "source": "Hugging Face", "score": -0.04149176180990991, "first_commit": "2024-05-06 16:31:55", "latest_commit": "2024-05-07 12:59:57", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "hubert-base-jtube This repo provides model weights for the hubert-base model trained on the JTubeSpeech corpus. ", "url": "https://huggingface.co/sarulab-speech/hubert-base-jtube", "project_name": "hubert-base-jtube", "downloads": 1487, "source": "Hugging Face", "score": -0.041548554815466, "first_commit": "2024-02-02 04:15:22", "latest_commit": "2024-02-05 11:49:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "HubertModel", "multi_labels": [ "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "Model Card for Japanese character-level DeBERTa V2 base Model description This is a Japanese DeBERTa V2 base model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/deberta-v2-base-japanese-char-wwm", "project_name": "deberta-v2-base-japanese-char-wwm", "downloads": 1453, "source": "Hugging Face", "score": -0.04178992508907939, "first_commit": "2023-01-18 13:55:30", "latest_commit": "2023-03-26 03:32:27", "languages": [], "model_or_dataset": "model", "model_size": 0.122, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "RakutenAI-7B Model Description RakutenAI-7B is a systematic initiative that brings the latest technologies to the world of Japanese LLMs.", "url": "https://huggingface.co/Rakuten/RakutenAI-7B", "project_name": "RakutenAI-7B", "downloads": 1432, "source": "Hugging Face", "score": -0.04193900672866412, "first_commit": "2024-03-18 06:45:28", "latest_commit": "2025-02-10 07:23:08", "languages": [], "model_or_dataset": "model", "model_size": 7.37, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "Japanese to emotions I fine-tuned LINE DistillBERT as the base model using WRIME Ver2 as the teacher data.", "url": "https://huggingface.co/koshin2001/Japanese-to-emotions", "project_name": "Japanese-to-emotions", "downloads": 1430, "source": "Hugging Face", "score": -0.041953204980053144, "first_commit": "2024-09-09 13:28:59", "latest_commit": "2024-09-11 01:49:55", "languages": [], "model_or_dataset": "model", "model_size": 0.06870000000000001, "model_architectures": "DistilBertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "gpt-neox-japanese-2.7b", "url": "https://huggingface.co/abeja/gpt-neox-japanese-2.7b", "project_name": "gpt-neox-japanese-2.7b", "downloads": 1419, "source": "Hugging Face", "score": -0.04203129536269277, "first_commit": "2022-08-29 02:15:44", "latest_commit": "2023-04-10 05:12:30", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXJapaneseForCausalLM", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "We provide an Amazon product reviews dataset for multilingual text classification.", "url": "https://huggingface.co/datasets/defunct-datasets/amazon_reviews_multi", "project_name": "amazon_reviews_multi", "downloads": 1414, "source": "Hugging Face", "score": -0.04206679099116533, "first_commit": "2022-01-25 16:34:54", "latest_commit": "2023-11-02 14:52:21", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "Mistral-7B-Instruct-v0.3-gguf mistralaiさんが公開しているMistral-7B-Instruct-v0.3のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Mistral-7B-Instruct-v0.3-gguf", "project_name": "Mistral-7B-Instruct-v0.3-gguf", "downloads": 1413, "source": "Hugging Face", "score": -0.04207389011685984, "first_commit": "2024-05-23 14:44:25", "latest_commit": "2024-05-23 15:58:46", "languages": [], "model_or_dataset": "model", "model_size": 7.25, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "llm-jp-3-8x1.8b-instruct3-gguf llm-jpさんが公開しているllm-jp-3-8x1.8b-instruct3のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/llm-jp-3-8x1.8b-instruct3-gguf", "project_name": "llm-jp-3-8x1.8b-instruct3-gguf", "downloads": 1402, "source": "Hugging Face", "score": -0.04215198049949946, "first_commit": "2025-03-27 13:25:59", "latest_commit": "2025-03-27 15:58:05", "languages": [], "model_or_dataset": "model", "model_size": 9.27, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "llm-jp-3-1.8b-instruct3 LLM-jp-3 is the series of large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-1.8b-instruct3", "project_name": "llm-jp-3-1.8b-instruct3", "downloads": 1395, "source": "Hugging Face", "score": -0.042201674379361044, "first_commit": "2025-01-31 01:21:44", "latest_commit": "2025-02-04 04:57:48", "languages": [], "model_or_dataset": "model", "model_size": 1.87, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "japanese-large-lm-3.6b-instruction-sft", "url": "https://huggingface.co/line-corporation/japanese-large-lm-3.6b-instruction-sft", "project_name": "japanese-large-lm-3.6b-instruction-sft", "downloads": 1373, "source": "Hugging Face", "score": -0.04235785514464029, "first_commit": "2023-08-14 17:18:09", "latest_commit": "2023-08-24 10:08:28", "languages": [], "model_or_dataset": "model", "model_size": 3.68, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "llm-jp-modernbert-base This model is based on the modernBERT-base architecture with llm-jp-tokenizer.", "url": "https://huggingface.co/llm-jp/llm-jp-modernbert-base", "project_name": "llm-jp-modernbert-base", "downloads": 1371, "source": "Hugging Face", "score": -0.04237205339602931, "first_commit": "2025-03-16 14:13:55", "latest_commit": "2025-04-23 02:58:45", "languages": [], "model_or_dataset": "model", "model_size": 0.187, "model_architectures": "ModernBertForMaskedLM", "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "Llama3 Swallow - Built with Meta Llama 3", "url": "https://huggingface.co/tokyotech-llm/Llama-3-Swallow-70B-Instruct-v0.1", "project_name": "Llama-3-Swallow-70B-Instruct-v0.1", "downloads": 1366, "source": "Hugging Face", "score": -0.04240754902450187, "first_commit": "2024-06-28 16:17:32", "latest_commit": "2024-07-19 08:08:59", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ELYZA-japanese-CodeLlama-7b-instruct-gguf ELYZAさんが公開しているELYZA-japanese-CodeLlama-7b-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-CodeLlama-7b-instruct-gguf", "project_name": "ELYZA-japanese-CodeLlama-7b-instruct-gguf", "downloads": 1344, "source": "Hugging Face", "score": -0.04256372978978112, "first_commit": "2023-11-15 09:48:32", "latest_commit": "2023-11-16 14:28:24", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-70B-GGUF", "project_name": "japanese-stablelm-instruct-beta-70B-GGUF", "downloads": 1333, "source": "Hugging Face", "score": -0.04264182017242074, "first_commit": "2023-11-02 15:45:24", "latest_commit": "2023-11-02 18:22:05", "languages": [], "model_or_dataset": "model", "model_size": 69.0, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "BERT large Japanese (unidic-lite with whole word masking, jawiki-20200831)", "url": "https://huggingface.co/tohoku-nlp/bert-large-japanese", "project_name": "bert-large-japanese", "downloads": 1323, "source": "Hugging Face", "score": -0.04271281142936585, "first_commit": "2021-03-05 06:17:13", "latest_commit": "2021-09-23 15:45:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Reflection-Llama-3.1-70B-gguf mattshumerさんが公開しているReflection-Llama-3.1-70Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Reflection-Llama-3.1-70B-gguf", "project_name": "Reflection-Llama-3.1-70B-gguf", "downloads": 1323, "source": "Hugging Face", "score": -0.04271281142936585, "first_commit": "2024-09-06 17:18:27", "latest_commit": "2024-09-07 04:00:27", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "llm-jp-3-13b-instruct3 LLM-jp-3 is the series of large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-13b-instruct3", "project_name": "llm-jp-3-13b-instruct3", "downloads": 1321, "source": "Hugging Face", "score": -0.04272700968075488, "first_commit": "2025-01-27 07:45:16", "latest_commit": "2025-02-04 04:59:09", "languages": [], "model_or_dataset": "model", "model_size": 13.7, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "modernbert-base-japanese-wikipedia Model Description", "url": "https://huggingface.co/KoichiYasuoka/modernbert-base-japanese-wikipedia", "project_name": "modernbert-base-japanese-wikipedia", "downloads": 1303, "source": "Hugging Face", "score": -0.04285479394325608, "first_commit": "2025-01-11 22:39:10", "latest_commit": "2025-02-05 16:04:52", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ModernBertForMaskedLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Swallow-MX-8x7b-NVE-v0.1 Our Swallow-MX-8x7b-NVE-v0.1 model has undergone continuous pre-training from the Mixtral-8x7B-Instruct-v0.1, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-MX-8x7b-NVE-v0.1", "project_name": "Swallow-MX-8x7b-NVE-v0.1", "downloads": 1287, "source": "Hugging Face", "score": -0.04296837995436826, "first_commit": "2024-02-22 04:44:42", "latest_commit": "2024-05-03 18:51:12", "languages": [], "model_or_dataset": "model", "model_size": 46.7, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "llm-lora-classification", "url": "https://github.com/hppRC/llm-lora-classification", "project_name": "llm-lora-classification", "stargazers_count": 97, "source": "GitHub", "score": -0.043018254057350894, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification" ] }, { "description": "Japanese Company Lexicon (JCLdic)", "url": "https://github.com/chakki-works/Japanese-Company-Lexicon", "project_name": "Japanese-Company-Lexicon", "stargazers_count": 97, "source": "GitHub", "score": -0.043018254057350894, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "pfnet-nekomata-14b-pfn-qfin-inst-merge-gguf pfnetさんが公開しているnekomata-14b-pfn-qfin-inst-mergeのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/pfnet-nekomata-14b-pfn-qfin-inst-merge-gguf", "project_name": "pfnet-nekomata-14b-pfn-qfin-inst-merge-gguf", "downloads": 1273, "source": "Hugging Face", "score": -0.04306776771409142, "first_commit": "2024-04-23 14:53:08", "latest_commit": "2024-04-24 14:39:32", "languages": [], "model_or_dataset": "model", "model_size": 14.2, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "llm-jp-3-8x13b-instruct3-gguf llm-jpさんが公開しているllm-jp-3-8x13b-instruct3のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/llm-jp-3-8x13b-instruct3-gguf", "project_name": "llm-jp-3-8x13b-instruct3-gguf", "downloads": 1247, "source": "Hugging Face", "score": -0.043252344982148715, "first_commit": "2025-03-27 14:10:56", "latest_commit": "2025-03-28 06:09:14", "languages": [], "model_or_dataset": "model", "model_size": 73.2, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "日本語ウェブコーパス2010 こちらのデータをhuggingfaceにアップロードしたものです｡ 2009 年度における著作権法の改正（平成21年通常国会著作権法改正等について | 文化庁）に基づき，情報解析研究への利用に限って利用可能です｡形態素解析を用いて､自動で句点をつけました｡変換コード変換スクリプト形態素解析など", "url": "https://huggingface.co/datasets/hatakeyama-llm-team/japanese2010", "project_name": "japanese2010", "downloads": 1242, "source": "Hugging Face", "score": -0.04328784061062127, "first_commit": "2024-03-20 00:39:02", "latest_commit": "2024-03-21 00:26:06", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese Parler-TTS Mini このリポジトリは、parler-tts/parler-tts-mini-v1を基に、日本語でのテキスト読み上げを可能にするよう再学習したモデルを公開しています。", "url": "https://huggingface.co/2121-8/japanese-parler-tts-mini", "project_name": "japanese-parler-tts-mini", "downloads": 1229, "source": "Hugging Face", "score": -0.043380129244649915, "first_commit": "2024-12-02 05:51:38", "latest_commit": "2024-12-05 08:13:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ParlerTTSForConditionalGeneration", "multi_labels": [] }, { "description": "Llama 3.1 Swallow - Built with Llama Llama 3.1 Swallow is a series of large language models (8B, 70B) that were built by continual pre-training on the Meta Llama 3.1 models.", "url": "https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-8B-v0.2", "project_name": "Llama-3.1-Swallow-8B-v0.2", "downloads": 1215, "source": "Hugging Face", "score": -0.043479517004373074, "first_commit": "2024-10-28 08:49:50", "latest_commit": "2025-01-31 04:03:37", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "The Cauldron is a massive collection of 50 vision-language datasets (training sets only) that were used for the fine-tuning of the vision-language model Idefics2.", "url": "https://huggingface.co/datasets/turing-motors/Cauldron-JA", "project_name": "Cauldron-JA", "downloads": 1209, "source": "Hugging Face", "score": -0.04352211175854014, "first_commit": "2024-08-05 02:20:03", "latest_commit": "2024-08-20 02:21:28", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Visual Data in NLP", "Text Generation", "Machine Translation", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "ModernBERT-Ja-30M This repository provides Japanese ModernBERT trained by SB Intuitions.", "url": "https://huggingface.co/sbintuitions/modernbert-ja-30m", "project_name": "modernbert-ja-30m", "downloads": 1207, "source": "Hugging Face", "score": -0.04353631000992916, "first_commit": "2025-02-19 10:27:20", "latest_commit": "2025-02-20 02:58:25", "languages": [], "model_or_dataset": "model", "model_size": 0.0369, "model_architectures": "ModernBertForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "JMMMU:", "url": "https://huggingface.co/datasets/JMMMU/JMMMU", "project_name": "JMMMU", "downloads": 1206, "source": "Hugging Face", "score": -0.04354340913562368, "first_commit": "2024-10-01 02:59:51", "latest_commit": "2024-10-05 18:27:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multimodality" ] }, { "description": "ELYZA-japanese-Llama-2-13b-fast-instruct-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-13b-fast-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-13b-fast-instruct-gguf", "project_name": "ELYZA-japanese-Llama-2-13b-fast-instruct-gguf", "downloads": 1202, "source": "Hugging Face", "score": -0.04357180563840172, "first_commit": "2023-12-27 09:46:04", "latest_commit": "2023-12-27 11:39:18", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "DeepSeek-R1-Distill-Qwen-32B-Japanese Model Description This is a Japanese finetuned model based on deepseek-ai/DeepSeek-R1-Distill-Qwen-32B. ", "url": "https://huggingface.co/cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese", "project_name": "DeepSeek-R1-Distill-Qwen-32B-Japanese", "downloads": 1172, "source": "Hugging Face", "score": -0.04378477940923706, "first_commit": "2025-01-27 06:53:14", "latest_commit": "2025-01-27 07:08:49", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Mistral-Nemo-Japanese-Instruct-2408 Model Description", "url": "https://huggingface.co/cyberagent/Mistral-Nemo-Japanese-Instruct-2408", "project_name": "Mistral-Nemo-Japanese-Instruct-2408", "downloads": 1170, "source": "Hugging Face", "score": -0.043798977660626084, "first_commit": "2024-08-30 03:57:43", "latest_commit": "2024-08-30 04:03:41", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "DanbotNL 2408 260M DanbotNL is translator that tranaslates from natural languages into Danbooru tags.", "url": "https://huggingface.co/dartags/DanbotNL-2408-260M", "project_name": "DanbotNL-2408-260M", "downloads": 1167, "source": "Hugging Face", "score": -0.04382027503770961, "first_commit": "2025-03-23 15:44:00", "latest_commit": "2025-03-24 13:58:14", "languages": [], "model_or_dataset": "model", "model_size": 0.262, "model_architectures": "DanbotNLForConditionalGeneration", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "からまる Llama-3-Karamaru-v1 Karamaru is a conversational AI model developed by Sakana AI that responds in the style of Edo-period Japanese.", "url": "https://huggingface.co/SakanaAI/Llama-3-Karamaru-v1", "project_name": "Llama-3-Karamaru-v1", "downloads": 1165, "source": "Hugging Face", "score": -0.04383447328909864, "first_commit": "2025-03-31 06:44:31", "latest_commit": "2025-04-01 00:57:24", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "relaion2B-en-research-safe-japanese-translation This dataset is the Japanese translation of the English subset of ReLAION-5B (laion/relaion2B-en-research-safe),", "url": "https://huggingface.co/datasets/llm-jp/relaion2B-en-research-safe-japanese-translation", "project_name": "relaion2B-en-research-safe-japanese-translation", "downloads": 1154, "source": "Hugging Face", "score": -0.043912563671738264, "first_commit": "2024-10-05 10:56:50", "latest_commit": "2025-02-14 13:40:53", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Responsible & Trustworthy NLP", "Machine Translation", "Language Models", "Semantic Text Processing", "Low-Resource NLP", "Annotation and Dataset Development" ] }, { "description": "Japanese-StableLM-Base-Alpha-7B \"A parrot able to speak Japanese, ukiyoe, edo period\" — Stable Diffusion XL Model Description japanese-stablelm-base-alpha-7b is a 7B-parameter decoder-only language model pre-trained on a diverse collection of Japanese and English datasets which focus on maximizing Japanese language modeling performance and Japanese downstream task performance.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-alpha-7b", "project_name": "japanese-stablelm-base-alpha-7b", "downloads": 1134, "source": "Hugging Face", "score": -0.044054546185628485, "first_commit": "2023-08-09 14:30:09", "latest_commit": "2023-08-22 09:36:29", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "JapaneseStableLMAlphaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "OpenCALM-Large Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", "url": "https://huggingface.co/cyberagent/open-calm-large", "project_name": "open-calm-large", "downloads": 1129, "source": "Hugging Face", "score": -0.04409004181410105, "first_commit": "2023-05-15 06:50:24", "latest_commit": "2023-05-18 01:11:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "hotchpotch/japanese-bge-reranker-v2-m3-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", "url": "https://huggingface.co/hotchpotch/japanese-bge-reranker-v2-m3-v1", "project_name": "japanese-bge-reranker-v2-m3-v1", "downloads": 1124, "source": "Hugging Face", "score": -0.0441255374425736, "first_commit": "2024-03-28 20:45:16", "latest_commit": "2024-04-01 02:40:22", "languages": [], "model_or_dataset": "model", "model_size": 0.5680000000000001, "model_architectures": "XLMRobertaForSequenceClassification", "multi_labels": [] }, { "description": "r1-1776-distill-llama-70b-gguf perplexity-aiさんが公開しているr1-1776-distill-llama-70bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/r1-1776-distill-llama-70b-gguf", "project_name": "r1-1776-distill-llama-70b-gguf", "downloads": 1117, "source": "Hugging Face", "score": -0.04417523132243518, "first_commit": "2025-02-22 03:53:11", "latest_commit": "2025-02-23 08:15:52", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "OpenCALM-7B Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", "url": "https://huggingface.co/cyberagent/open-calm-7b", "project_name": "open-calm-7b", "downloads": 1115, "source": "Hugging Face", "score": -0.044189429573824206, "first_commit": "2023-05-15 07:53:34", "latest_commit": "2023-05-18 01:12:08", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "DataPilot-ArrowPro-7B-KUJIRA-gguf DataPilotさんが公開しているArrowPro-7B-KUJIRAのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/DataPilot-ArrowPro-7B-KUJIRA-gguf", "project_name": "DataPilot-ArrowPro-7B-KUJIRA-gguf", "downloads": 1111, "source": "Hugging Face", "score": -0.04421782607660225, "first_commit": "2024-05-09 13:21:27", "latest_commit": "2024-05-11 07:24:16", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Elite Voice Project これはホロライブ所属Vtuberさくらみこ氏の声をデータセット化し音声認識などで活用できるようにする事を目的とした非公式プロジェクトです。 ", "url": "https://huggingface.co/datasets/Elite35P-Server/EliteVoiceProject", "project_name": "EliteVoiceProject", "downloads": 1107, "source": "Hugging Face", "score": -0.044246222579380295, "first_commit": "2022-11-30 16:10:15", "latest_commit": "2023-01-15 04:28:16", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "tokyotech-llm-Swallow-7b-instruct-v0.1-gguf tokyotech-llmさんが公開しているSwallow-7b-instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/tokyotech-llm-Swallow-7b-instruct-v0.1-gguf", "project_name": "tokyotech-llm-Swallow-7b-instruct-v0.1-gguf", "downloads": 1101, "source": "Hugging Face", "score": -0.04428881733354736, "first_commit": "2024-05-03 04:09:27", "latest_commit": "2024-05-03 04:53:43", "languages": [], "model_or_dataset": "model", "model_size": 6.83, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-base-beta-70B-GGUF", "project_name": "japanese-stablelm-base-beta-70B-GGUF", "downloads": 1097, "source": "Hugging Face", "score": -0.044317213836325406, "first_commit": "2023-11-06 11:33:47", "latest_commit": "2023-11-06 12:14:36", "languages": [], "model_or_dataset": "model", "model_size": 69.0, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "ELYZA-japanese-Llama-2-13b Model Description ELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-13b-instruct", "project_name": "ELYZA-japanese-Llama-2-13b-instruct", "downloads": 1073, "source": "Hugging Face", "score": -0.044487592852993675, "first_commit": "2023-12-25 16:10:32", "latest_commit": "2023-12-27 01:41:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Llama 3 Youko 8B (rinna/llama-3-youko-8b)", "url": "https://huggingface.co/rinna/llama-3-youko-8b", "project_name": "llama-3-youko-8b", "downloads": 1072, "source": "Hugging Face", "score": -0.04449469197868819, "first_commit": "2024-05-01 07:53:46", "latest_commit": "2024-07-25 05:14:42", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "japanese-large-lm-3.6b", "url": "https://huggingface.co/line-corporation/japanese-large-lm-3.6b", "project_name": "japanese-large-lm-3.6b", "downloads": 1068, "source": "Hugging Face", "score": -0.04452308848146623, "first_commit": "2023-07-21 00:48:05", "latest_commit": "2023-08-17 01:06:17", "languages": [], "model_or_dataset": "model", "model_size": 3.68, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "mathstral-7B-v0.1-gguf mistralaiさんが公開しているmathstral-7B-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/mathstral-7B-v0.1-gguf", "project_name": "mathstral-7B-v0.1-gguf", "downloads": 1067, "source": "Hugging Face", "score": -0.044530187607160744, "first_commit": "2024-07-17 17:49:56", "latest_commit": "2024-07-17 18:54:27", "languages": [], "model_or_dataset": "model", "model_size": 7.25, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Ruri: Japanese General Text Embeddings Ruri v3 is a general-purpose Japanese text embedding model built on top of ModernBERT-Ja.", "url": "https://huggingface.co/cl-nagoya/ruri-v3-30m", "project_name": "ruri-v3-30m", "downloads": 1065, "source": "Hugging Face", "score": -0.044544385858549765, "first_commit": "2025-04-07 06:58:25", "latest_commit": "2025-04-17 09:37:12", "languages": [], "model_or_dataset": "model", "model_size": 0.0367, "model_architectures": "ModernBertModel", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "(简体中文|English|日本語) Introduction github repo : https://github.com/FunAudioLLM/SenseVoice SenseVoice is a speech foundation model with multiple speech understanding capabilities, including automatic speech recognition (ASR), spoken language identification (LID), speech emotion recognition (SER), and audio event detection (AED).", "url": "https://huggingface.co/FunAudioLLM/SenseVoiceSmall", "project_name": "SenseVoiceSmall", "downloads": 1062, "source": "Hugging Face", "score": -0.0445656832356333, "first_commit": "2024-07-03 03:56:49", "latest_commit": "2024-07-31 05:47:48", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "rinna/youri-7b Overview We conduct continual pre-training of llama2-7b on 40B tokens from a mixture of Japanese and English datasets.", "url": "https://huggingface.co/rinna/youri-7b", "project_name": "youri-7b", "downloads": 1057, "source": "Hugging Face", "score": -0.044601178864105855, "first_commit": "2023-10-30 15:12:17", "latest_commit": "2024-07-22 08:01:22", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "tokyotech-llm-Swallow-13b-instruct-v0.1-gguf tokyotech-llmさんが公開しているSwallow-13b-instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/tokyotech-llm-Swallow-13b-instruct-v0.1-gguf", "project_name": "tokyotech-llm-Swallow-13b-instruct-v0.1-gguf", "downloads": 1057, "source": "Hugging Face", "score": -0.044601178864105855, "first_commit": "2024-05-02 14:18:27", "latest_commit": "2024-05-03 04:36:24", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.1-gguf tokyotech-llmさんが公開しているLlama-3.1-Swallow-8B-Instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.1-gguf", "project_name": "tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.1-gguf", "downloads": 1040, "source": "Hugging Face", "score": -0.04472186400091255, "first_commit": "2024-10-09 13:38:40", "latest_commit": "2024-10-09 15:21:38", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "tokyotech-llm-Swallow-MS-7b-instruct-v0.1-gguf tokyotech-llmさんが公開しているSwallow-MS-7b-instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/tokyotech-llm-Swallow-MS-7b-instruct-v0.1-gguf", "project_name": "tokyotech-llm-Swallow-MS-7b-instruct-v0.1-gguf", "downloads": 1038, "source": "Hugging Face", "score": -0.04473606225230157, "first_commit": "2024-05-02 13:37:22", "latest_commit": "2024-05-03 04:35:34", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This is a Japanese translated version of HumanEval, an evaluation harness for the HumanEval problem solving dataset described in the paper \"Evaluating Large Language Models Trained on Code\".", "url": "https://huggingface.co/datasets/kogi-jwu/jhumaneval", "project_name": "jhumaneval", "downloads": 1037, "source": "Hugging Face", "score": -0.04474316137799608, "first_commit": "2023-10-21 08:20:14", "latest_commit": "2024-01-10 21:52:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Annotation and Dataset Development" ] }, { "description": "※llama.cpp Releases b3428(7/21)", "url": "https://huggingface.co/MCZK/EZO-Common-9B-gemma-2-it-GGUF", "project_name": "EZO-Common-9B-gemma-2-it-GGUF", "downloads": 1027, "source": "Hugging Face", "score": -0.04481415263494119, "first_commit": "2024-07-10 11:12:59", "latest_commit": "2024-07-21 11:26:08", "languages": [], "model_or_dataset": "model", "model_size": 9.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.2-gguf tokyotech-llmさんが公開しているLlama-3.1-Swallow-8B-Instruct-v0.2のggufフォーマット変換版です。", "url": "https://huggingface.co/mmnga/tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.2-gguf", "project_name": "tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.2-gguf", "downloads": 1020, "source": "Hugging Face", "score": -0.044863846514802776, "first_commit": "2024-11-11 12:24:59", "latest_commit": "2024-11-11 13:54:52", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Llama-3-ELYZA-JP-8B-gguf elyzaさんが公開しているLlama-3-ELYZA-JP-8Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3-ELYZA-JP-8B-gguf", "project_name": "Llama-3-ELYZA-JP-8B-gguf", "downloads": 1008, "source": "Hugging Face", "score": -0.044949036023136914, "first_commit": "2024-06-26 16:36:04", "latest_commit": "2024-06-26 17:55:35", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "bilingual-gpt-neox-4b Overview This repository provides an English-Japanese bilingual GPT-NeoX model of 3.8 billion parameters.", "url": "https://huggingface.co/rinna/bilingual-gpt-neox-4b", "project_name": "bilingual-gpt-neox-4b", "downloads": 1004, "source": "Hugging Face", "score": -0.044977432525914955, "first_commit": "2023-07-31 02:34:03", "latest_commit": "2024-07-20 08:02:07", "languages": [], "model_or_dataset": "model", "model_size": 3.95, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Multilinguality", "Language Models", "Semantic Text Processing" ] }, { "description": "stockmark-gpt-neox-japanese-1.4b-gguf stockmarkさんが公開しているgpt-neox-japanese-1.4bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/stockmark-gpt-neox-japanese-1.4b-gguf", "project_name": "stockmark-gpt-neox-japanese-1.4b-gguf", "downloads": 995, "source": "Hugging Face", "score": -0.04504132465716556, "first_commit": "2023-08-22 12:45:18", "latest_commit": "2023-09-08 22:00:37", "languages": [], "model_or_dataset": "model", "model_size": 1.41, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "aya-23-35B-gguf CohereForAIさんが公開しているaya-23-35Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/aya-23-35B-gguf", "project_name": "aya-23-35B-gguf", "downloads": 993, "source": "Hugging Face", "score": -0.04505552290855458, "first_commit": "2024-05-26 16:32:27", "latest_commit": "2024-05-27 00:47:56", "languages": [], "model_or_dataset": "model", "model_size": 35.0, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-13b-instruct-hf", "project_name": "Swallow-13b-instruct-hf", "downloads": 985, "source": "Hugging Face", "score": -0.04511231591411067, "first_commit": "2023-12-07 03:10:55", "latest_commit": "2024-06-29 08:56:29", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "tokyotech-llm-Llama-3.1-Swallow-70B-Instruct-v0.3-gguf tokyotech-llmさんが公開しているLlama-3.1-Swallow-70B-Instruct-v0.3のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/tokyotech-llm-Llama-3.1-Swallow-70B-Instruct-v0.3-gguf", "project_name": "tokyotech-llm-Llama-3.1-Swallow-70B-Instruct-v0.3-gguf", "downloads": 982, "source": "Hugging Face", "score": -0.045133613291194204, "first_commit": "2024-12-30 07:17:56", "latest_commit": "2024-12-30 19:31:26", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "cyberagent-open-calm-7b-gguf cyberagentさんが公開しているopen-calm-7bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/cyberagent-open-calm-7b-gguf", "project_name": "cyberagent-open-calm-7b-gguf", "downloads": 978, "source": "Hugging Face", "score": -0.04516200979397225, "first_commit": "2023-08-21 09:55:24", "latest_commit": "2023-09-08 03:08:46", "languages": [], "model_or_dataset": "model", "model_size": 6.87, "model_architectures": null, "multi_labels": [ "Semantic Text Processing" ] }, { "description": "gpt2-large-japanese This repository provides a large sized Japanese GPT-2 model.", "url": "https://huggingface.co/abeja/gpt2-large-japanese", "project_name": "gpt2-large-japanese", "downloads": 977, "source": "Hugging Face", "score": -0.04516910891966676, "first_commit": "2022-08-29 05:17:36", "latest_commit": "2022-08-29 16:10:11", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "This dataset contains a diverse set of natural Japanese speech, collected from terrestrial television streams.", "url": "https://huggingface.co/datasets/reazon-research/reazonspeech", "project_name": "reazonspeech", "downloads": 977, "source": "Hugging Face", "score": -0.04516910891966676, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "qwen2.5-bakeneko-32b-instruct-v2-gguf rinnaさんが公開しているqwen2.5-bakeneko-32b-instruct-v2のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/qwen2.5-bakeneko-32b-instruct-v2-gguf", "project_name": "qwen2.5-bakeneko-32b-instruct-v2-gguf", "downloads": 975, "source": "Hugging Face", "score": -0.04518330717105579, "first_commit": "2025-03-20 14:15:25", "latest_commit": "2025-03-20 20:20:51", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [] }, { "description": "Dataset Summary RealPersonaChat は，話者本人のペルソナと性格特性を含む，約14,000件の日本語雑談対話からなるコーパスです．", "url": "https://huggingface.co/datasets/nu-dialogue/real-persona-chat", "project_name": "real-persona-chat", "downloads": 960, "source": "Hugging Face", "score": -0.04528979405647345, "first_commit": "2024-03-09 22:52:22", "latest_commit": "2024-03-13 10:26:42", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "ABEJA-Qwen2.5-7b-Japanese-v0.1-gguf abejaさんが公開しているABEJA-Qwen2.5-7b-Japanese-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ABEJA-Qwen2.5-7b-Japanese-v0.1-gguf", "project_name": "ABEJA-Qwen2.5-7b-Japanese-v0.1-gguf", "downloads": 931, "source": "Hugging Face", "score": -0.045495668701614284, "first_commit": "2025-04-17 12:18:38", "latest_commit": "2025-04-17 14:10:37", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": null, "multi_labels": [] }, { "description": "PLaMo-13B-Instruct Model Description PLaMo-13B-Instruct is an instruct fine-tuned model built upon the 8192 context length version of PLaMo-13B text generation model.", "url": "https://huggingface.co/pfnet/plamo-13b-instruct", "project_name": "plamo-13b-instruct", "downloads": 923, "source": "Hugging Face", "score": -0.04555246170717037, "first_commit": "2023-10-26 02:11:24", "latest_commit": "2024-01-25 07:46:09", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "PlamoForCausalLM", "multi_labels": [ "Dialogue Response Generation", "Text Generation", "Language Models" ] }, { "description": "合成日本語指示データセット概要このデータセットは、大規模言語モデル（LLM）を用いて自動生成された日本語の指示とそれに対する応答のコレクションです。", "url": "https://huggingface.co/datasets/DeL-TaiseiOzaki/magpie-llm-jp-3-13b-20k", "project_name": "magpie-llm-jp-3-13b-20k", "downloads": 920, "source": "Hugging Face", "score": -0.0455737590842539, "first_commit": "2024-10-13 03:40:26", "latest_commit": "2024-10-13 04:07:44", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "aixsatoshi-Llama-3-8b-Cosmopedia-japanese-gguf aixsatoshiさんが公開しているLlama-3-8b-Cosmopedia-japaneseのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/aixsatoshi-Llama-3-8b-Cosmopedia-japanese-gguf", "project_name": "aixsatoshi-Llama-3-8b-Cosmopedia-japanese-gguf", "downloads": 919, "source": "Hugging Face", "score": -0.045580858209948415, "first_commit": "2024-05-01 12:36:43", "latest_commit": "2024-05-19 08:27:21", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese-LLaMA-3-8B Japanese-LLaMA-3-8Bは基盤モデル、フルモデルです。 ", "url": "https://huggingface.co/owner203/japanese-llama-3-8b", "project_name": "japanese-llama-3-8b", "downloads": 919, "source": "Hugging Face", "score": -0.045580858209948415, "first_commit": "2024-06-05 02:19:05", "latest_commit": "2024-06-21 06:35:41", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "このドキュメントの日本語版はまだ作成中です。", "url": "https://huggingface.co/bclavie/JaColBERT", "project_name": "JaColBERT", "downloads": 918, "source": "Hugging Face", "score": -0.04558795733564293, "first_commit": "2023-12-25 22:43:54", "latest_commit": "2024-01-27 15:30:00", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "HF_ColBERT", "multi_labels": [ "Information Retrieval", "Language Models", "Document Retrieval", "Semantic Text Processing" ] }, { "description": "Llama-3-ELYZA-JP-8B-AWQ Model Description Llama-3-ELYZA-JP-8B is a large language model trained by ELYZA, Inc.", "url": "https://huggingface.co/elyza/Llama-3-ELYZA-JP-8B-AWQ", "project_name": "Llama-3-ELYZA-JP-8B-AWQ", "downloads": 911, "source": "Hugging Face", "score": -0.045637651215504504, "first_commit": "2024-06-25 04:31:31", "latest_commit": "2024-06-26 02:56:39", "languages": [], "model_or_dataset": "model", "model_size": 1.98, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "japanese-large-lm-1.7b This repository provides a 1.7B parameters Japanese language model, trained by LINE Corporation.", "url": "https://huggingface.co/line-corporation/japanese-large-lm-1.7b", "project_name": "japanese-large-lm-1.7b", "downloads": 910, "source": "Hugging Face", "score": -0.04564475034119902, "first_commit": "2023-07-21 00:46:33", "latest_commit": "2023-08-17 01:06:37", "languages": [], "model_or_dataset": "model", "model_size": 1.75, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "calm3-22b-RP-v2-GGUF 概要 Aratako/calm3-22b-RP-v2の量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/calm3-22b-RP-v2-GGUF", "project_name": "calm3-22b-RP-v2-GGUF", "downloads": 906, "source": "Hugging Face", "score": -0.04567314684397706, "first_commit": "2024-09-16 04:30:57", "latest_commit": "2024-09-16 09:55:09", "languages": [], "model_or_dataset": "model", "model_size": 22.5, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "sarashina2.2-3b-instruct-v0.1-gguf sbintuitionsさんが公開しているsarashina2.2-3b-instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/sarashina2.2-3b-instruct-v0.1-gguf", "project_name": "sarashina2.2-3b-instruct-v0.1-gguf", "downloads": 901, "source": "Hugging Face", "score": -0.04570864247244962, "first_commit": "2025-03-05 07:56:41", "latest_commit": "2025-03-05 16:20:38", "languages": [], "model_or_dataset": "model", "model_size": 3.36, "model_architectures": null, "multi_labels": [] }, { "description": "Overview This dataset provides a convenient and user-friendly format of data from Aozora Bunko (青空文庫), a website that compiles public-domain books in Japan, ideal for Machine Learning applications.", "url": "https://huggingface.co/datasets/globis-university/aozorabunko-clean", "project_name": "aozorabunko-clean", "downloads": 901, "source": "Hugging Face", "score": -0.04570864247244962, "first_commit": "2023-06-26 13:31:28", "latest_commit": "2023-10-27 13:22:32", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining" ] }, { "description": "ABEJA-QwQ32b-Reasoning-Japanese-v1.0-gguf abejaさんが公開しているABEJA-QwQ32b-Reasoning-Japanese-v1.0のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ABEJA-QwQ32b-Reasoning-Japanese-v1.0-gguf", "project_name": "ABEJA-QwQ32b-Reasoning-Japanese-v1.0-gguf", "downloads": 877, "source": "Hugging Face", "score": -0.04587902148911789, "first_commit": "2025-04-17 10:07:03", "latest_commit": "2025-04-17 18:13:15", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [ "Reasoning" ] }, { "description": "google/gemma-3-12b-it-qat-q4_0-unquantizedを日本語が多く含まれるimatrixを使って量子化したモデルですThis is a model that quantizes google/gemma-3-12b-it-qat-q4_0-unquantized using an imatrix that contains a lot of Japanese..", "url": "https://huggingface.co/dahara1/gemma-3-12b-it-qat-japanese-imatrix", "project_name": "gemma-3-12b-it-qat-japanese-imatrix", "downloads": 876, "source": "Hugging Face", "score": -0.0458861206148124, "first_commit": "2025-04-19 03:13:38", "latest_commit": "2025-04-23 02:07:19", "languages": [], "model_or_dataset": "model", "model_size": 11.8, "model_architectures": null, "multi_labels": [ "Multilinguality", "Natural Language Interfaces", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "Llama-3-Swallow-70B-Instruct-v0.1-gguf tokyotech-llmさんが公開しているLlama-3-Swallow-70B-Instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3-Swallow-70B-Instruct-v0.1-gguf", "project_name": "Llama-3-Swallow-70B-Instruct-v0.1-gguf", "downloads": 875, "source": "Hugging Face", "score": -0.04589321974050691, "first_commit": "2024-07-01 14:21:29", "latest_commit": "2024-07-07 05:04:16", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "OpenCALM-1B Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", "url": "https://huggingface.co/cyberagent/open-calm-1b", "project_name": "open-calm-1b", "downloads": 874, "source": "Hugging Face", "score": -0.045900318866201426, "first_commit": "2023-05-15 07:00:18", "latest_commit": "2023-05-18 01:11:30", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Sarashina2-Vision-8B Sarashina2-Vision-8B is a Japanese Large Vision Language Model trained by SB Intuitions.", "url": "https://huggingface.co/sbintuitions/sarashina2-vision-8b", "project_name": "sarashina2-vision-8b", "downloads": 866, "source": "Hugging Face", "score": -0.045957111871757515, "first_commit": "2025-03-09 21:01:37", "latest_commit": "2025-03-27 02:30:58", "languages": [], "model_or_dataset": "model", "model_size": 7.99, "model_architectures": "Sarashina2VisionForCausalLM", "multi_labels": [ "Visual Data in NLP", "Language Models", "Multimodality" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-instruct-hf", "project_name": "Swallow-70b-instruct-hf", "downloads": 864, "source": "Hugging Face", "score": -0.045971310123146536, "first_commit": "2023-12-11 07:23:47", "latest_commit": "2024-06-29 08:56:31", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "haqishen-Llama-3-8B-Japanese-Instruct-gguf haqishenさんが公開しているLlama-3-8B-Japanese-Instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/haqishen-Llama-3-8B-Japanese-Instruct-gguf", "project_name": "haqishen-Llama-3-8B-Japanese-Instruct-gguf", "downloads": 861, "source": "Hugging Face", "score": -0.04599260750023007, "first_commit": "2024-04-23 13:55:17", "latest_commit": "2024-04-23 14:54:23", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese Stable LM Base Gamma 7B Model Description", "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-gamma-7b", "project_name": "japanese-stablelm-base-gamma-7b", "downloads": 860, "source": "Hugging Face", "score": -0.045999706625924584, "first_commit": "2023-10-16 08:15:14", "latest_commit": "2024-01-25 08:05:12", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "About static quants of https://huggingface.co/nk2t/Llama-3-8B-Instruct-japanese-nk2t-v0.2 weighted/imatrix quants are available at https://huggingface.co/mradermacher/Llama-3-8B-Instruct-japanese-nk2t-v0.2-i1-GGUF Usage If you are unsure how to use GGUF files, refer to one of TheBloke's READMEs for more details, including on how to concatenate multi-part files.", "url": "https://huggingface.co/mradermacher/Llama-3-8B-Instruct-japanese-nk2t-v0.2-GGUF", "project_name": "Llama-3-8B-Instruct-japanese-nk2t-v0.2-GGUF", "downloads": 854, "source": "Hugging Face", "score": -0.046042301380091646, "first_commit": "2025-01-01 01:18:12", "latest_commit": "2025-01-02 01:18:45", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "CyberAgentLM2-7B (CALM2-7B)", "url": "https://huggingface.co/cyberagent/calm2-7b", "project_name": "calm2-7b", "downloads": 852, "source": "Hugging Face", "score": -0.046056499631480674, "first_commit": "2023-11-01 07:24:59", "latest_commit": "2023-11-02 05:46:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "J-Moshi: A Japanese Full-duplex Spoken Dialogue System J-Moshiは，日本語におけるfull-duplex音声対話システムです．", "url": "https://huggingface.co/nu-dialogue/j-moshi-ext", "project_name": "j-moshi-ext", "downloads": 849, "source": "Hugging Face", "score": -0.04607779700856421, "first_commit": "2025-01-21 15:24:48", "latest_commit": "2025-02-15 03:04:38", "languages": [], "model_or_dataset": "model", "model_size": 7.69, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "Llama-3-Swallow-8B-Instruct-v0.1-gguf tokyotech-llmさんが公開しているLlama-3-Swallow-8B-Instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3-Swallow-8B-Instruct-v0.1-gguf", "project_name": "Llama-3-Swallow-8B-Instruct-v0.1-gguf", "downloads": 848, "source": "Hugging Face", "score": -0.046084896134258715, "first_commit": "2024-07-01 16:42:54", "latest_commit": "2024-07-02 10:43:55", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "OpenCALM-3B Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", "url": "https://huggingface.co/cyberagent/open-calm-3b", "project_name": "open-calm-3b", "downloads": 844, "source": "Hugging Face", "score": -0.046113292637036764, "first_commit": "2023-05-15 07:14:36", "latest_commit": "2023-05-18 01:11:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "KMNIST Dataset lassify images from the KMNIST dataset into one of the 10 classes, representing different Japanese characters.", "url": "https://huggingface.co/datasets/tanganke/kmnist", "project_name": "kmnist", "downloads": 835, "source": "Hugging Face", "score": -0.04617718476828737, "first_commit": "2024-04-26 10:34:09", "latest_commit": "2024-05-02 13:59:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "umiyuki-Japanese-Chat-Umievo-itr001-7b-gguf umiyukiさんが公開しているJapanese-Chat-Umievo-itr001-7bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/umiyuki-Japanese-Chat-Umievo-itr001-7b-gguf", "project_name": "umiyuki-Japanese-Chat-Umievo-itr001-7b-gguf", "downloads": 830, "source": "Hugging Face", "score": -0.04621268039675992, "first_commit": "2024-04-27 09:55:39", "latest_commit": "2024-04-27 10:52:17", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Ruri: Japanese General Text Embeddings Ruri v3 is a general-purpose Japanese text embedding model built on top of ModernBERT-Ja.", "url": "https://huggingface.co/cl-nagoya/ruri-v3-70m", "project_name": "ruri-v3-70m", "downloads": 828, "source": "Hugging Face", "score": -0.04622687864814894, "first_commit": "2025-04-09 04:06:36", "latest_commit": "2025-04-17 09:37:38", "languages": [], "model_or_dataset": "model", "model_size": 0.07, "model_architectures": "ModernBertModel", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "hubert-large-asr", "url": "https://huggingface.co/TKU410410103/hubert-large-japanese-asr", "project_name": "hubert-large-japanese-asr", "downloads": 825, "source": "Hugging Face", "score": -0.04624817602523248, "first_commit": "2024-04-09 03:01:08", "latest_commit": "2024-04-14 13:21:01", "languages": [], "model_or_dataset": "model", "model_size": 0.316, "model_architectures": "HubertForCTC", "multi_labels": [ "Speech Recognition", "Text Generation", "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "alfredplpl-Llama-3-8B-Instruct-Ja-gguf alfredplplさんが公開しているLlama-3-8B-Instruct-Jaのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/alfredplpl-Llama-3-8B-Instruct-Ja-gguf", "project_name": "alfredplpl-Llama-3-8B-Instruct-Ja-gguf", "downloads": 823, "source": "Hugging Face", "score": -0.0462623742766215, "first_commit": "2024-04-23 14:18:57", "latest_commit": "2024-04-23 15:24:47", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese SimCSE (BERT-base)", "url": "https://huggingface.co/pkshatech/simcse-ja-bert-base-clcmlp", "project_name": "simcse-ja-bert-base-clcmlp", "downloads": 820, "source": "Hugging Face", "score": -0.04628367165370503, "first_commit": "2022-12-26 02:52:03", "latest_commit": "2023-01-27 06:44:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertModel", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "本モデルについて About this model.", "url": "https://huggingface.co/dahara1/gemma-2-2b-jpn-it-gguf-japanese-imatrix", "project_name": "gemma-2-2b-jpn-it-gguf-japanese-imatrix", "downloads": 816, "source": "Hugging Face", "score": -0.04631206815648308, "first_commit": "2024-10-03 14:27:12", "latest_commit": "2024-10-03 15:56:59", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ArrowPro-7B-KillerWhale-gguf DataPilotさんが公開しているArrowPro-7B-KillerWhaleのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ArrowPro-7B-KillerWhale-gguf", "project_name": "ArrowPro-7B-KillerWhale-gguf", "downloads": 815, "source": "Hugging Face", "score": -0.04631916728217759, "first_commit": "2024-05-29 15:06:55", "latest_commit": "2024-05-29 15:53:17", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Tanuki-8B-dpo-v1.0-AWQ 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0のAWQ 4bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-AWQ", "project_name": "Tanuki-8B-dpo-v1.0-AWQ", "downloads": 813, "source": "Hugging Face", "score": -0.046333365533566616, "first_commit": "2024-08-27 04:50:35", "latest_commit": "2024-09-03 09:29:23", "languages": [], "model_or_dataset": "model", "model_size": 1.47, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "shisa-7b-v1-gguf augmxntさんが公開しているshisa-7b-v1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/shisa-7b-v1-gguf", "project_name": "shisa-7b-v1-gguf", "downloads": 811, "source": "Hugging Face", "score": -0.04634756378495564, "first_commit": "2023-12-09 14:02:20", "latest_commit": "2023-12-10 12:24:25", "languages": [], "model_or_dataset": "model", "model_size": 7.96, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "YuisekinAIEvol-Mistral-7B-ja-math-v0.1.1-gguf yuisekiさんが公開しているYuisekinAIEvol-Mistral-7B-ja-math-v0.1.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/YuisekinAIEvol-Mistral-7B-ja-math-v0.1.1-gguf", "project_name": "YuisekinAIEvol-Mistral-7B-ja-math-v0.1.1-gguf", "downloads": 811, "source": "Hugging Face", "score": -0.04634756378495564, "first_commit": "2024-04-29 14:18:07", "latest_commit": "2024-04-29 15:52:08", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "About weighted/imatrix quants of https://huggingface.co/abeja/ABEJA-Qwen2.5-7b-Japanese-v0.1 static quants are available at https://huggingface.co/mradermacher/ABEJA-Qwen2.5-7b-Japanese-v0.1-GGUF Usage If you are unsure how to use GGUF files, refer to one of TheBloke's READMEs for more details, including on how to concatenate multi-part files.", "url": "https://huggingface.co/mradermacher/ABEJA-Qwen2.5-7b-Japanese-v0.1-i1-GGUF", "project_name": "ABEJA-Qwen2.5-7b-Japanese-v0.1-i1-GGUF", "downloads": 798, "source": "Hugging Face", "score": -0.04643985241898428, "first_commit": "2025-04-22 07:00:21", "latest_commit": "2025-04-24 00:47:05", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "What is this?", "url": "https://huggingface.co/grapevine-AI/gemma-2-2b-jpn-it-gguf", "project_name": "gemma-2-2b-jpn-it-gguf", "downloads": 793, "source": "Hugging Face", "score": -0.04647534804745684, "first_commit": "2024-10-03 11:25:27", "latest_commit": "2024-10-03 11:53:48", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Yandere2023:", "url": "https://huggingface.co/datasets/nyanko7/yandere2023", "project_name": "yandere2023", "downloads": 793, "source": "Hugging Face", "score": -0.04647534804745684, "first_commit": "2024-01-07 10:31:53", "latest_commit": "2024-05-06 08:22:23", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ELYZA-japanese-Llama-2-7b Model Description ELYZA-japanese-Llama-2-7b は、 Llama2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b-fast", "project_name": "ELYZA-japanese-Llama-2-7b-fast", "downloads": 790, "source": "Hugging Face", "score": -0.04649664542454037, "first_commit": "2023-08-28 13:17:58", "latest_commit": "2023-08-29 03:46:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "rinna-llama-3-youko-8b-gguf rinnaさんが公開しているllama-3-youko-8bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/rinna-llama-3-youko-8b-gguf", "project_name": "rinna-llama-3-youko-8b-gguf", "downloads": 788, "source": "Hugging Face", "score": -0.04651084367592939, "first_commit": "2024-05-01 14:17:53", "latest_commit": "2024-05-01 15:11:21", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "gemma-2-2b-it-gguf googleさんが公開しているgemma-2-2b-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/gemma-2-2b-it-gguf", "project_name": "gemma-2-2b-it-gguf", "downloads": 788, "source": "Hugging Face", "score": -0.04651084367592939, "first_commit": "2024-08-01 17:22:58", "latest_commit": "2024-08-01 18:29:08", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This dataset was created by automatically translating \"databricks-dolly-15k\" into Japanese.", "url": "https://huggingface.co/datasets/kunishou/databricks-dolly-15k-ja", "project_name": "databricks-dolly-15k-ja", "downloads": 786, "source": "Hugging Face", "score": -0.04652504192731842, "first_commit": "2023-04-13 08:31:08", "latest_commit": "2024-04-01 17:26:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "Rakuda - Questions for Japanese models Repository:", "url": "https://huggingface.co/datasets/yuzuai/rakuda-questions", "project_name": "rakuda-questions", "downloads": 784, "source": "Hugging Face", "score": -0.04653924017870744, "first_commit": "2023-06-23 01:08:54", "latest_commit": "2023-06-23 08:01:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Question Answering" ] }, { "description": "Moonlight-16B-A3B-Instruct-gguf moonshotaiさんが公開しているMoonlight-16B-A3B-Instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Moonlight-16B-A3B-Instruct-gguf", "project_name": "Moonlight-16B-A3B-Instruct-gguf", "downloads": 782, "source": "Hugging Face", "score": -0.04655343843009646, "first_commit": "2025-02-23 17:11:42", "latest_commit": "2025-02-23 19:50:28", "languages": [], "model_or_dataset": "model", "model_size": 16.0, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "Kotoba-Whisper-Bilingual (v1.0)", "url": "https://huggingface.co/kotoba-tech/kotoba-whisper-bilingual-v1.0", "project_name": "kotoba-whisper-bilingual-v1.0", "downloads": 781, "source": "Hugging Face", "score": -0.046560537555790975, "first_commit": "2024-09-27 06:18:39", "latest_commit": "2024-09-30 06:57:43", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Multilinguality", "Speech Recognition", "Text Generation", "Machine Translation", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "JMMLU Japanese Massive Multitask Language Understanding Benchmark JMMLU is a four-choice question set consisting of Japanese-translated questions of a portion of MMLU (Paper, Github) (Translated questions) and questions based on unique Japanese cultural context (Japanese questions).", "url": "https://huggingface.co/datasets/nlp-waseda/JMMLU", "project_name": "JMMLU", "downloads": 780, "source": "Hugging Face", "score": -0.04656763668148548, "first_commit": "2024-02-09 12:19:13", "latest_commit": "2024-02-27 05:22:30", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models" ] }, { "description": "ELYZA-japanese-CodeLlama-7b-gguf ELYZAさんが公開しているELYZA-japanese-CodeLlama-7b-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-CodeLlama-7b-gguf", "project_name": "ELYZA-japanese-CodeLlama-7b-gguf", "downloads": 768, "source": "Hugging Face", "score": -0.04665282618981962, "first_commit": "2023-11-15 09:53:42", "latest_commit": "2023-11-16 14:28:03", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Japanese-StableLM-Base-Beta-70B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-base-beta-70b is a 70B-parameter decoder-only language model based on Llama-2-70b that has been fine-tuned on a diverse collection of Japanese data, with the intent of maximizing downstream performance on Japanese language tasks.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-beta-70b", "project_name": "japanese-stablelm-base-beta-70b", "downloads": 767, "source": "Hugging Face", "score": -0.046659925315514134, "first_commit": "2023-10-30 07:46:28", "latest_commit": "2023-12-19 06:44:53", "languages": [], "model_or_dataset": "model", "model_size": 69.0, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "Polyglot-math-4x7b-24b Polyglot-4x7b is a Mixture of Experts approach to a multilingual model.", "url": "https://huggingface.co/macadeliccc/polyglot-math-4x7b", "project_name": "polyglot-math-4x7b", "downloads": 761, "source": "Hugging Face", "score": -0.0467025200696812, "first_commit": "2024-01-13 03:05:44", "latest_commit": "2024-03-04 19:25:12", "languages": [], "model_or_dataset": "model", "model_size": 24.2, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Reasoning" ] }, { "description": "Japanese Stable LM Instruct Gamma 7B Model Description", "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-gamma-7b", "project_name": "japanese-stablelm-instruct-gamma-7b", "downloads": 751, "source": "Hugging Face", "score": -0.04677351132662631, "first_commit": "2023-10-16 08:55:06", "latest_commit": "2024-01-24 05:54:38", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "ELYZA-japanese-Llama-2-7b-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-7bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-7b-gguf", "project_name": "ELYZA-japanese-Llama-2-7b-gguf", "downloads": 750, "source": "Hugging Face", "score": -0.04678061045232083, "first_commit": "2023-08-29 06:32:01", "latest_commit": "2023-11-16 14:27:12", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "tweet-topic-large-multilingual This model is based on cardiffnlp/twitter-xlm-roberta-large-2022 language model and isfinetuned for multi-label topic classification in English, Spanish, Japanese, and Greek.", "url": "https://huggingface.co/cardiffnlp/tweet-topic-large-multilingual", "project_name": "tweet-topic-large-multilingual", "downloads": 745, "source": "Hugging Face", "score": -0.04681610608079338, "first_commit": "2024-10-04 01:01:39", "latest_commit": "2024-11-26 11:41:39", "languages": [], "model_or_dataset": "model", "model_size": 0.56, "model_architectures": "XLMRobertaForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese-StableLM-Instruct-Alpha-7B-v2 \"A parrot able to speak Japanese, ukiyoe, edo period\" — Stable Diffusion XL Model Description japanese-stablelm-instruct-alpha-7b-v2 is a 7B parameter decoder-only language models pre-trained built on top of the Japanese-StableLM-Base-Alpha-7B model and further fine-tuned on various instruction-following datasets.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-alpha-7b-v2", "project_name": "japanese-stablelm-instruct-alpha-7b-v2", "downloads": 740, "source": "Hugging Face", "score": -0.04685160170926594, "first_commit": "2023-10-06 08:40:24", "latest_commit": "2023-10-06 08:40:24", "languages": [], "model_or_dataset": "model", "model_size": 7.01, "model_architectures": "JapaneseStableLMAlphaForCausalLM", "multi_labels": [ "Text Generation", "Language Models" ] }, { "description": "rinna/nekomata-7b Overview We conduct continual pre-training of qwen-7b on 30B tokens from a mixture of Japanese and English datasets.", "url": "https://huggingface.co/rinna/nekomata-7b", "project_name": "nekomata-7b", "downloads": 738, "source": "Hugging Face", "score": -0.04686579996065496, "first_commit": "2023-12-19 06:58:44", "latest_commit": "2024-07-20 08:35:21", "languages": [], "model_or_dataset": "model", "model_size": 7.72, "model_architectures": "QWenLMHeadModel", "multi_labels": [] }, { "description": "Tanuki-8B-dpo-v1.0 モデルについて Tanuki-8Bは、フルスクラッチで約1.3Tトークン事前学習を行った約8Bパラメータの大規模言語モデルです。", "url": "https://huggingface.co/weblab-GENIAC/Tanuki-8B-dpo-v1.0", "project_name": "Tanuki-8B-dpo-v1.0", "downloads": 732, "source": "Hugging Face", "score": -0.04690839471482203, "first_commit": "2024-08-12 12:47:52", "latest_commit": "2024-09-02 23:47:02", "languages": [], "model_or_dataset": "model", "model_size": 7.51, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "ABEJA-Qwen2.5-32b-Japanese-v0.1 ABEJA-Qwen2.5-32b-Japanese-v0.1はQwen/Qwen2.5-32B-Instructをベースに日本語中心とした継続事前学習を実施したモデルです。", "url": "https://huggingface.co/abeja/ABEJA-Qwen2.5-32b-Japanese-v0.1", "project_name": "ABEJA-Qwen2.5-32b-Japanese-v0.1", "downloads": 732, "source": "Hugging Face", "score": -0.04690839471482203, "first_commit": "2025-01-17 01:49:07", "latest_commit": "2025-01-30 07:25:45", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "tokyotech-llm-Swallow-70b-instruct-v0.1-gguf tokyotech-llmさんが公開しているSwallow-70b-instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/tokyotech-llm-Swallow-70b-instruct-v0.1-gguf", "project_name": "tokyotech-llm-Swallow-70b-instruct-v0.1-gguf", "downloads": 732, "source": "Hugging Face", "score": -0.04690839471482203, "first_commit": "2024-05-03 09:00:00", "latest_commit": "2024-05-04 06:52:16", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "calm3-22b-RP-GGUF 概要 Aratako/calm3-22b-RPの量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/calm3-22b-RP-GGUF", "project_name": "calm3-22b-RP-GGUF", "downloads": 731, "source": "Hugging Face", "score": -0.04691549384051654, "first_commit": "2024-08-21 01:13:32", "latest_commit": "2024-08-21 13:26:35", "languages": [], "model_or_dataset": "model", "model_size": 22.5, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "DeepSeek-R1-Distill-Qwen-14B-Japanese-gguf cyberagent/DeepSeek-R1-Distill-Qwen-14B-Japanese License MIT License 👉 DeepSeek-R1-Distill-Qwen-32B-Japanese-gguf こっちのがいいかも👉 mmnga/cyberagent-DeepSeek-R1-Distill-Qwen-14B-Japanese-gguf", "url": "https://huggingface.co/bluepen5805/DeepSeek-R1-Distill-Qwen-14B-Japanese-gguf", "project_name": "DeepSeek-R1-Distill-Qwen-14B-Japanese-gguf", "downloads": 729, "source": "Hugging Face", "score": -0.04692969209190556, "first_commit": "2025-01-27 08:48:59", "latest_commit": "2025-01-28 04:13:43", "languages": [], "model_or_dataset": "model", "model_size": 14.8, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "Mistral-nemoをEPR用途向けにファインチューニングしたモデルです使用したデータセットの半分ほどが日本語なのでmagnumのようなモデルよりも日本語には強いはず？ ", "url": "https://huggingface.co/ascktgcc/Mistral-nemo-ja-rp-v0.1", "project_name": "Mistral-nemo-ja-rp-v0.1", "downloads": 727, "source": "Hugging Face", "score": -0.04694389034329458, "first_commit": "2024-10-16 13:30:25", "latest_commit": "2024-10-20 02:18:19", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "QuantFactory/TinySlime-1.1B-Chat-v1.0-GGUF", "url": "https://huggingface.co/QuantFactory/TinySlime-1.1B-Chat-v1.0-GGUF", "project_name": "TinySlime-1.1B-Chat-v1.0-GGUF", "downloads": 726, "source": "Hugging Face", "score": -0.046950989468989096, "first_commit": "2024-09-11 05:27:47", "latest_commit": "2024-09-11 05:55:46", "languages": [], "model_or_dataset": "model", "model_size": 1.1, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Phi-3-mini-128k-instruct-gguf microsoftさんが公開しているPhi-3-mini-128k-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Phi-3-mini-128k-instruct-gguf", "project_name": "Phi-3-mini-128k-instruct-gguf", "downloads": 725, "source": "Hugging Face", "score": -0.0469580885946836, "first_commit": "2024-04-24 13:50:51", "latest_commit": "2024-04-24 14:24:09", "languages": [], "model_or_dataset": "model", "model_size": 3.82, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "japanese-gpt-neox-3.6b-instruction-sft-v2 Overview", "url": "https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-sft-v2", "project_name": "japanese-gpt-neox-3.6b-instruction-sft-v2", "downloads": 724, "source": "Hugging Face", "score": -0.04696518772037812, "first_commit": "2023-05-30 01:50:25", "latest_commit": "2024-07-20 07:57:35", "languages": [], "model_or_dataset": "model", "model_size": 3.76, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-base-japanese-aozora-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-aozora-ud-head", "project_name": "deberta-base-japanese-aozora-ud-head", "downloads": 722, "source": "Hugging Face", "score": -0.04697938597176714, "first_commit": "2022-06-15 04:02:27", "latest_commit": "2023-03-04 20:10:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Semantic Text Processing" ] }, { "description": "日本語のデータセットを SentenceTransformes で学習しやすいカラム名と構造に変換したもの。 ", "url": "https://huggingface.co/datasets/hotchpotch/sentence_transformer_japanese", "project_name": "sentence_transformer_japanese", "downloads": 721, "source": "Hugging Face", "score": -0.04698648509746165, "first_commit": "2025-01-19 08:23:26", "latest_commit": "2025-01-20 10:19:14", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Normalization" ] }, { "description": "gemma-3-4b-it-RP-v0.1-GGUF 概要 Aratako/gemma-3-4b-it-RP-v0.1のGGUF量子化モデルです。", "url": "https://huggingface.co/Aratako/gemma-3-4b-it-RP-v0.1-GGUF", "project_name": "gemma-3-4b-it-RP-v0.1-GGUF", "downloads": 709, "source": "Hugging Face", "score": -0.04707167460579579, "first_commit": "2025-04-08 00:58:15", "latest_commit": "2025-04-09 03:18:20", "languages": [], "model_or_dataset": "model", "model_size": 4.55, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "本モデルについて about this model.", "url": "https://huggingface.co/dahara1/Qwen2.5-3B-Instruct-gguf-japanese-imatrix-128K", "project_name": "Qwen2.5-3B-Instruct-gguf-japanese-imatrix-128K", "downloads": 700, "source": "Hugging Face", "score": -0.047135566737046386, "first_commit": "2024-11-15 02:50:24", "latest_commit": "2024-11-17 11:43:29", "languages": [], "model_or_dataset": "model", "model_size": 3.09, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Summarization", "Text Generation" ] }, { "description": "Llama3-ArrowSE-8B-v0.3-gguf DataPilotさんが公開しているLlama3-ArrowSE-8B-v0.3のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama3-ArrowSE-8B-v0.3-gguf", "project_name": "Llama3-ArrowSE-8B-v0.3-gguf", "downloads": 700, "source": "Hugging Face", "score": -0.047135566737046386, "first_commit": "2024-07-07 07:27:12", "latest_commit": "2024-07-07 09:30:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "rinna/nekomata-14b Overview We conduct continual pre-training of qwen-14b on 66B tokens from a mixture of Japanese and English datasets.", "url": "https://huggingface.co/rinna/nekomata-14b", "project_name": "nekomata-14b", "downloads": 697, "source": "Hugging Face", "score": -0.04715686411412992, "first_commit": "2023-12-19 08:09:53", "latest_commit": "2024-07-22 07:58:40", "languages": [], "model_or_dataset": "model", "model_size": 14.2, "model_architectures": "QWenLMHeadModel", "multi_labels": [] }, { "description": "cogito-v1-preview-qwen-32B-gguf deepcogitoさんが公開しているcogito-v1-preview-qwen-32Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/cogito-v1-preview-qwen-32B-gguf", "project_name": "cogito-v1-preview-qwen-32B-gguf", "downloads": 697, "source": "Hugging Face", "score": -0.04715686411412992, "first_commit": "2025-04-09 03:35:55", "latest_commit": "2025-04-09 10:46:41", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [] }, { "description": "QuantFactory/plamo-13b-GGUF", "url": "https://huggingface.co/QuantFactory/plamo-13b-GGUF", "project_name": "plamo-13b-GGUF", "downloads": 697, "source": "Hugging Face", "score": -0.04715686411412992, "first_commit": "2024-10-21 02:41:41", "latest_commit": "2024-10-21 04:14:39", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Sarashina1-13B", "url": "https://huggingface.co/sbintuitions/sarashina1-13b", "project_name": "sarashina1-13b", "downloads": 691, "source": "Hugging Face", "score": -0.04719945886829699, "first_commit": "2024-06-07 11:56:53", "latest_commit": "2024-06-27 06:56:06", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Fish Speech V1.2 Fish Speech V1.2 is a leading text-to-speech (TTS) model trained on 300k hours of English, Chinese, and Japanese audio data.", "url": "https://huggingface.co/fishaudio/fish-speech-1.2-sft", "project_name": "fish-speech-1.2-sft", "downloads": 690, "source": "Hugging Face", "score": -0.0472065579939915, "first_commit": "2024-07-18 08:00:29", "latest_commit": "2024-08-02 08:13:06", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ELYZA-japanese-Llama-2-13b-fast Model Description ELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-13b-fast", "project_name": "ELYZA-japanese-Llama-2-13b-fast", "downloads": 686, "source": "Hugging Face", "score": -0.047234954496769545, "first_commit": "2023-12-25 17:14:45", "latest_commit": "2023-12-27 01:41:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Sarashina1-65B", "url": "https://huggingface.co/sbintuitions/sarashina1-65b", "project_name": "sarashina1-65b", "downloads": 685, "source": "Hugging Face", "score": -0.04724205362246406, "first_commit": "2024-06-07 11:57:56", "latest_commit": "2024-06-27 06:56:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Umievo-itr012-Gleipnir-7B このモデルは強力な４つの日本語モデルを進化的アルゴリズムで進化的マージしたものです。", "url": "https://huggingface.co/umiyuki/Umievo-itr012-Gleipnir-7B", "project_name": "Umievo-itr012-Gleipnir-7B", "downloads": 683, "source": "Hugging Face", "score": -0.04725625187385308, "first_commit": "2024-05-29 12:32:29", "latest_commit": "2024-05-29 13:51:31", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "ELYZA-japanese-Llama-2-13b-fast-instruct Model Description ELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-13b-fast-instruct", "project_name": "ELYZA-japanese-Llama-2-13b-fast-instruct", "downloads": 683, "source": "Hugging Face", "score": -0.04725625187385308, "first_commit": "2023-12-25 18:14:10", "latest_commit": "2023-12-27 01:41:51", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "ABEJA-CC-JA This dataset is hf mirror of https://registry.opendata.aws/abeja-cc-ja/ Please Refer to https://tech-blog.abeja.asia/entry/abeja-cc-ja-202409 このデータセットはhttps://registry.opendata.aws/abeja-cc-ja/のHFミラーです。 ", "url": "https://huggingface.co/datasets/kajuma/ABEJA-CC-JA", "project_name": "ABEJA-CC-JA", "downloads": 683, "source": "Hugging Face", "score": -0.04725625187385308, "first_commit": "2024-11-03 05:37:36", "latest_commit": "2024-12-06 07:22:40", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "RakutenAI-7B-instruct Model Description RakutenAI-7B is a systematic initiative that brings the latest technologies to the world of Japanese LLMs.", "url": "https://huggingface.co/Rakuten/RakutenAI-7B-instruct", "project_name": "RakutenAI-7B-instruct", "downloads": 680, "source": "Hugging Face", "score": -0.047277549250936614, "first_commit": "2024-03-19 09:16:58", "latest_commit": "2025-02-10 07:24:59", "languages": [], "model_or_dataset": "model", "model_size": 7.37, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Text Normalization" ] }, { "description": "日本語T5事前学習済みモデル This is a T5 (Text-to-Text Transfer Transformer) model pretrained on Japanese corpus. ", "url": "https://huggingface.co/sonoisa/t5-base-japanese-v1.1", "project_name": "t5-base-japanese-v1.1", "downloads": 675, "source": "Hugging Face", "score": -0.04731304487940917, "first_commit": "2022-08-12 15:41:28", "latest_commit": "2022-08-27 09:21:01", "languages": [], "model_or_dataset": "model", "model_size": 0.248, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "c4ai-command-r-plus-gguf CohereForAIさんが公開しているc4ai-command-r-plusのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/c4ai-command-r-plus-gguf", "project_name": "c4ai-command-r-plus-gguf", "downloads": 675, "source": "Hugging Face", "score": -0.04731304487940917, "first_commit": "2024-04-22 14:46:41", "latest_commit": "2024-04-23 16:13:37", "languages": [], "model_or_dataset": "model", "model_size": 104.0, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "japanese-gpt-neox-small This repository provides a small-sized Japanese GPT-NeoX model.", "url": "https://huggingface.co/rinna/japanese-gpt-neox-small", "project_name": "japanese-gpt-neox-small", "downloads": 673, "source": "Hugging Face", "score": -0.04732724313079819, "first_commit": "2022-08-31 05:58:25", "latest_commit": "2024-07-20 07:53:40", "languages": [], "model_or_dataset": "model", "model_size": 0.20400000000000001, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "DataPilot-ArrowPro-7B-RobinHood-gguf DataPilotさんが公開しているArrowPro-7B-RobinHoodのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/DataPilot-ArrowPro-7B-RobinHood-gguf", "project_name": "DataPilot-ArrowPro-7B-RobinHood-gguf", "downloads": 672, "source": "Hugging Face", "score": -0.047334342256492704, "first_commit": "2024-05-11 07:22:37", "latest_commit": "2024-05-11 13:43:09", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Phi-3-medium-128k-instruct-gguf microsoftさんが公開しているPhi-3-medium-128k-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Phi-3-medium-128k-instruct-gguf", "project_name": "Phi-3-medium-128k-instruct-gguf", "downloads": 669, "source": "Hugging Face", "score": -0.04735563963357624, "first_commit": "2024-05-22 15:27:33", "latest_commit": "2024-05-22 16:56:55", "languages": [], "model_or_dataset": "model", "model_size": 14.0, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "stockmark/stockmark-13b Stockmark-13b is a 13 billion parameter LLM pretrained from scratch based on Japanese corpus of about 220B tokens.", "url": "https://huggingface.co/stockmark/stockmark-13b", "project_name": "stockmark-13b", "downloads": 667, "source": "Hugging Face", "score": -0.04736983788496526, "first_commit": "2023-10-21 06:53:06", "latest_commit": "2024-05-17 06:15:56", "languages": [], "model_or_dataset": "model", "model_size": 13.2, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "Llama-3.1-70B-EZO-1.1-it-gguf HODACHIさんが公開しているLlama-3.1-70B-EZO-1.1-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3.1-70B-EZO-1.1-it-gguf", "project_name": "Llama-3.1-70B-EZO-1.1-it-gguf", "downloads": 667, "source": "Hugging Face", "score": -0.04736983788496526, "first_commit": "2024-07-31 12:12:13", "latest_commit": "2024-07-31 21:47:25", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ryota39-Phi-3-mini-4k-instruct-dpo-gguf ryota39さんが公開しているPhi-3-mini-4k-instruct-dpoのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ryota39-Phi-3-mini-4k-instruct-dpo-gguf", "project_name": "ryota39-Phi-3-mini-4k-instruct-dpo-gguf", "downloads": 664, "source": "Hugging Face", "score": -0.04739113526204879, "first_commit": "2024-04-29 14:27:31", "latest_commit": "2024-04-29 16:53:45", "languages": [], "model_or_dataset": "model", "model_size": 3.82, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese StableLM-3B-4E1T Instruct Model Description", "url": "https://huggingface.co/stabilityai/japanese-stablelm-3b-4e1t-instruct", "project_name": "japanese-stablelm-3b-4e1t-instruct", "downloads": 661, "source": "Hugging Face", "score": -0.04741243263913233, "first_commit": "2023-10-16 07:50:31", "latest_commit": "2024-04-26 03:20:42", "languages": [], "model_or_dataset": "model", "model_size": 2.8, "model_architectures": "StableLMEpochForCausalLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Language Models", "Robustness in NLP" ] }, { "description": "Sarashina1-7B This repository provides Japanese language models trained by SB Intuitions.", "url": "https://huggingface.co/sbintuitions/sarashina1-7b", "project_name": "sarashina1-7b", "downloads": 661, "source": "Hugging Face", "score": -0.04741243263913233, "first_commit": "2024-06-07 10:13:21", "latest_commit": "2024-06-27 06:55:38", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "日本語はこちら lightblue/DeepSeek-R1-Distill-Qwen-7B-Japanese Deepseek's R1 models are excellent, state-of-the-art reasoning models which have been trained to work bilingually, with English and Chinese.", "url": "https://huggingface.co/lightblue/DeepSeek-R1-Distill-Qwen-7B-Japanese", "project_name": "DeepSeek-R1-Distill-Qwen-7B-Japanese", "downloads": 656, "source": "Hugging Face", "score": -0.04744792826760488, "first_commit": "2025-01-24 08:54:39", "latest_commit": "2025-01-27 08:14:48", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Reasoning", "Language Models", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "日本語版CLIPモデル This is a CLIP text/image encoder model for Japanese. ", "url": "https://huggingface.co/sonoisa/clip-vit-b-32-japanese-v1", "project_name": "clip-vit-b-32-japanese-v1", "downloads": 655, "source": "Hugging Face", "score": -0.0474550273932994, "first_commit": "2022-02-15 15:47:34", "latest_commit": "2022-04-19 14:18:58", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertModel", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.3-gguf tokyotech-llmさんが公開しているLlama-3.1-Swallow-8B-Instruct-v0.3のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.3-gguf", "project_name": "tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.3-gguf", "downloads": 653, "source": "Hugging Face", "score": -0.04746922564468842, "first_commit": "2024-12-24 00:51:41", "latest_commit": "2024-12-24 03:10:26", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Please feel free to open an issue or pull request. ", "url": "https://huggingface.co/datasets/kumapo/JAQKET", "project_name": "JAQKET", "downloads": 653, "source": "Hugging Face", "score": -0.04746922564468842, "first_commit": "2023-06-21 13:04:38", "latest_commit": "2023-10-09 06:44:28", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Annotation and Dataset Development" ] }, { "description": "llm-jp-3-7.2b-instruct3-gguf llm-jpさんが公開しているllm-jp-3-7.2b-instruct3のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/llm-jp-3-7.2b-instruct3-gguf", "project_name": "llm-jp-3-7.2b-instruct3-gguf", "downloads": 652, "source": "Hugging Face", "score": -0.04747632477038293, "first_commit": "2025-02-05 10:23:48", "latest_commit": "2025-02-05 13:35:27", "languages": [], "model_or_dataset": "model", "model_size": 7.29, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-NVE-hf", "project_name": "Swallow-70b-NVE-hf", "downloads": 648, "source": "Hugging Face", "score": -0.04750472127316097, "first_commit": "2023-12-07 07:34:35", "latest_commit": "2024-06-29 08:56:25", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "pfnet-nekomata-14b-pfn-qfin-gguf pfnetさんが公開しているnekomata-14b-pfn-qfinのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/pfnet-nekomata-14b-pfn-qfin-gguf", "project_name": "pfnet-nekomata-14b-pfn-qfin-gguf", "downloads": 646, "source": "Hugging Face", "score": -0.04751891952455, "first_commit": "2024-04-24 12:58:10", "latest_commit": "2024-04-24 14:46:15", "languages": [], "model_or_dataset": "model", "model_size": 14.2, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Model Card for Japanese DeBERTa V3 base Model description This is a Japanese DeBERTa V3 base model pre-trained on LLM-jp corpus v1.0.", "url": "https://huggingface.co/ku-nlp/deberta-v3-base-japanese", "project_name": "deberta-v3-base-japanese", "downloads": 643, "source": "Hugging Face", "score": -0.047540216901633535, "first_commit": "2024-04-23 05:08:21", "latest_commit": "2024-04-28 06:08:55", "languages": [], "model_or_dataset": "model", "model_size": 0.23600000000000002, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "japanese-large-lm-1.7b-instruction-sft This repository provides a 1.7B parameters Japanese language model, fine-tuned and trained by LINE Corporation.", "url": "https://huggingface.co/line-corporation/japanese-large-lm-1.7b-instruction-sft", "project_name": "japanese-large-lm-1.7b-instruction-sft", "downloads": 638, "source": "Hugging Face", "score": -0.04757571253010609, "first_commit": "2023-08-14 17:19:11", "latest_commit": "2023-08-14 17:19:11", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "KARAKURI LM KARAKURI LM is a pretrained language model that builds upon Llama 2.", "url": "https://huggingface.co/karakuri-ai/karakuri-lm-70b-chat-v0.1", "project_name": "karakuri-lm-70b-chat-v0.1", "downloads": 637, "source": "Hugging Face", "score": -0.0475828116558006, "first_commit": "2024-01-26 09:08:09", "latest_commit": "2024-05-07 09:00:17", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Language Models", "Semantic Text Processing" ] }, { "description": "ELYZA-japanese-Llama-2-13b-fast-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-13b-fastのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-13b-fast-gguf", "project_name": "ELYZA-japanese-Llama-2-13b-fast-gguf", "downloads": 634, "source": "Hugging Face", "score": -0.04760410903288413, "first_commit": "2023-12-27 10:40:52", "latest_commit": "2023-12-27 13:18:46", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "Parakeet TDT-CTC 0.6B (ja) | | parakeet-tdt_ctc-0.6b-ja is an ASR model that transcribes Japanese speech with Punctuations.", "url": "https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja", "project_name": "parakeet-tdt_ctc-0.6b-ja", "downloads": 631, "source": "Hugging Face", "score": -0.047625406409967666, "first_commit": "2024-05-13 15:39:30", "latest_commit": "2024-05-17 17:20:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation", "Speech & Audio in NLP", "Multimodality" ] }, { "description": "Stockmark-13b-instruct Stockmark-13b-instruct is an instruction-tuned version of Stockmark-13b, a 13 billion parameter Japanese LLM.", "url": "https://huggingface.co/stockmark/stockmark-13b-instruct", "project_name": "stockmark-13b-instruct", "downloads": 630, "source": "Hugging Face", "score": -0.04763250553566218, "first_commit": "2023-11-08 16:56:34", "latest_commit": "2023-11-08 17:02:17", "languages": [], "model_or_dataset": "model", "model_size": 13.2, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "Ninja-v1-gguf Local-Novel-LLM-projectさんが公開しているNinja-v1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Ninja-v1-gguf", "project_name": "Ninja-v1-gguf", "downloads": 628, "source": "Hugging Face", "score": -0.0476467037870512, "first_commit": "2024-05-03 14:03:22", "latest_commit": "2024-05-04 13:26:22", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "QuantFactory/Mistral-Nemo-Japanese-Instruct-2408-GGUF This is quantized version of cyberagent/Mistral-Nemo-Japanese-Instruct-2408 created using llama.cpp Original Model Card Mistral-Nemo-Japanese-Instruct-2408 Model Description", "url": "https://huggingface.co/QuantFactory/Mistral-Nemo-Japanese-Instruct-2408-GGUF", "project_name": "Mistral-Nemo-Japanese-Instruct-2408-GGUF", "downloads": 627, "source": "Hugging Face", "score": -0.047653802912745714, "first_commit": "2024-09-09 15:17:36", "latest_commit": "2024-09-09 16:27:10", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "🎈 FlexDreamHK FlexDreamHKはリークされたNovelAIモデルの入っていない、あるいはそのリスクを可能な限り低くしたモデルを目指して作成しました。 ", "url": "https://huggingface.co/den2nova/FlexDreamHK", "project_name": "FlexDreamHK", "downloads": 621, "source": "Hugging Face", "score": -0.04769639766691278, "first_commit": "2023-07-06 10:11:45", "latest_commit": "2023-07-29 04:21:29", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "luke-japanese-large-lite luke-japanese is the Japanese version of LUKE (Language Understanding with Knowledge-based Embeddings), a pre-trained knowledge-enhanced contextualized representation of words and entities.", "url": "https://huggingface.co/studio-ousia/luke-japanese-large-lite", "project_name": "luke-japanese-large-lite", "downloads": 618, "source": "Hugging Face", "score": -0.04771769504399631, "first_commit": "2022-11-07 14:26:40", "latest_commit": "2022-11-09 11:19:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForMaskedLM", "multi_labels": [ "Representation Learning", "Knowledge Representation", "Semantic Text Processing" ] }, { "description": "This is the filtered Japanese subset of XL-Sum followed by PaLM 2 filters 15-gram overlap * code: https://gist.github.com/mkshing/d6371cbfdd50d4f352cee247fd4dd86a number of examples train: 4215 (before: 7113) validation: 758 (before: 889) test: 766 (before: 889)", "url": "https://huggingface.co/datasets/mkshing/xlsum_ja", "project_name": "xlsum_ja", "downloads": 616, "source": "Hugging Face", "score": -0.04773189329538534, "first_commit": "2023-06-16 04:15:41", "latest_commit": "2023-06-20 23:28:48", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "albert-base-japanese-v1 日本語事前学習済みALBERTモデルです", "url": "https://huggingface.co/ken11/albert-base-japanese-v1", "project_name": "albert-base-japanese-v1", "downloads": 615, "source": "Hugging Face", "score": -0.047738992421079846, "first_commit": "2021-12-19 17:07:14", "latest_commit": "2021-12-22 03:04:30", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "AlbertForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "GitHub リポジトリ ids-cv/wrime で公開されているデータセットを利用しています。 ", "url": "https://huggingface.co/datasets/llm-book/wrime-sentiment", "project_name": "wrime-sentiment", "downloads": 615, "source": "Hugging Face", "score": -0.047738992421079846, "first_commit": "2023-07-29 06:38:26", "latest_commit": "2023-10-06 00:56:38", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Sentiment Analysis" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-NVE-instruct-hf", "project_name": "Swallow-70b-NVE-instruct-hf", "downloads": 610, "source": "Hugging Face", "score": -0.04777448804955241, "first_commit": "2023-12-13 03:56:30", "latest_commit": "2024-07-06 15:18:24", "languages": [], "model_or_dataset": "model", "model_size": 69.0, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Japanese-StableLM-Base-JAVocab-Beta-7B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-base-ja_vocab-beta-7b is a 7B-parameter decoder-only language model based on Llama-2-7b that has been fine-tuned on a diverse collection of Japanese data, with the intent of maximizing downstream performance on Japanese language tasks.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-ja_vocab-beta-7b", "project_name": "japanese-stablelm-base-ja_vocab-beta-7b", "downloads": 608, "source": "Hugging Face", "score": -0.04778868630094143, "first_commit": "2023-10-30 07:49:15", "latest_commit": "2023-12-19 06:45:58", "languages": [], "model_or_dataset": "model", "model_size": 6.88, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "aixsatoshi-Honyaku-13b-gguf aixsatoshiさんが公開しているHonyaku-13bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/aixsatoshi-Honyaku-13b-gguf", "project_name": "aixsatoshi-Honyaku-13b-gguf", "downloads": 604, "source": "Hugging Face", "score": -0.04781708280371947, "first_commit": "2024-05-19 08:07:15", "latest_commit": "2024-05-19 09:24:59", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Meta-Llama-3-8B-Instruct-gguf meta-llamaさんが公開しているMeta-Llama-3-8B-Instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Meta-Llama-3-8B-Instruct-gguf", "project_name": "Meta-Llama-3-8B-Instruct-gguf", "downloads": 602, "source": "Hugging Face", "score": -0.0478312810551085, "first_commit": "2024-05-12 07:18:00", "latest_commit": "2024-05-12 08:08:38", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "google/gemma-3-27b-it-qat-q4_0-unquantizedを日本語が多く含まれるimatrixを使って量子化したモデルですThis is a model that quantizes google/gemma-3-27b-it-qat-q4_0-unquantized using an imatrix that contains a lot of Japanese.https://huggingface.co/dahara1/imatrix-jpn-test).", "url": "https://huggingface.co/dahara1/gemma-3-27b-it-qat-japanese-imatrix", "project_name": "gemma-3-27b-it-qat-japanese-imatrix", "downloads": 596, "source": "Hugging Face", "score": -0.04787387580927556, "first_commit": "2025-04-19 16:35:07", "latest_commit": "2025-04-23 02:07:44", "languages": [], "model_or_dataset": "model", "model_size": 27.0, "model_architectures": null, "multi_labels": [ "Multilinguality", "Natural Language Interfaces" ] }, { "description": "Ruri: Japanese General Text Embeddings Ruri v3 is a general-purpose Japanese text embedding model built on top of ModernBERT-Ja.", "url": "https://huggingface.co/cl-nagoya/ruri-v3-130m", "project_name": "ruri-v3-130m", "downloads": 596, "source": "Hugging Face", "score": -0.04787387580927556, "first_commit": "2025-04-09 04:07:51", "latest_commit": "2025-04-17 09:38:35", "languages": [], "model_or_dataset": "model", "model_size": 0.132, "model_architectures": "ModernBertModel", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "KARAKURI LM KARAKURI LM is a pretrained language model that builds upon Llama 2.", "url": "https://huggingface.co/karakuri-ai/karakuri-lm-70b-v0.1", "project_name": "karakuri-lm-70b-v0.1", "downloads": 595, "source": "Hugging Face", "score": -0.04788097493497007, "first_commit": "2024-01-26 10:49:53", "latest_commit": "2024-05-07 09:00:06", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese-StableLM-Instruct-Beta-7B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-instruct-beta-7b is a 7B-parameter decoder-only language model based on", "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-beta-7b", "project_name": "japanese-stablelm-instruct-beta-7b", "downloads": 593, "source": "Hugging Face", "score": -0.047895173186359094, "first_commit": "2023-10-30 07:47:09", "latest_commit": "2023-12-19 06:43:49", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "bert-base-japanese-v3-jnli 「大規模言語モデル入門」の第5章で紹介している(自然言語推論)のモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-jnli", "project_name": "bert-base-japanese-v3-jnli", "downloads": 591, "source": "Hugging Face", "score": -0.04790937143774812, "first_commit": "2023-06-12 14:15:16", "latest_commit": "2023-07-24 06:49:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Representation Learning", "Textual Inference", "Language Models", "Semantic Text Processing" ] }, { "description": "fineweb-2-edu-japanese の small_tokens の text カラムをユニコード正規化(NFKC)したものを fineweb-2-japanese-text-cleaner を使ってノイズ箇所を推論したRAWデータセットです。 ", "url": "https://huggingface.co/datasets/hotchpotch/fineweb-2-edu-japanese-noise-detect-raw", "project_name": "fineweb-2-edu-japanese-noise-detect-raw", "downloads": 590, "source": "Hugging Face", "score": -0.04791647056344263, "first_commit": "2025-02-19 23:08:15", "latest_commit": "2025-02-20 02:03:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Term Extraction" ] }, { "description": "recruit-jp/japanese-typo-detector-roberta-base モデルの概要日本語の文章を入力すると各文字ごとに誤字脱字である確率を出力します各ラベルの意味は以下の通りです id label meaning 0 OK 誤字なし 1 deletion 1文字の抜け 2 insertion_a 余分な1文字の挿入 3 insertion_b 直前の文字列と一致する２文字以上の余分な文字の挿入 4 kanji-conversion_a 同一の読みを持つ漢字の入れ替え（誤変換） 5 kanji-conversion_b 近い読みを持つ漢字の入れ替え（誤変換） 6 substitution 1文字の入れ替え 7 transposition 隣接する２文字間の転置 8 others その他の入力誤り誤り種類の詳細については学習データセットの元論文をご参照ください日本語 Wikipedia の編集履歴に基づく入力誤りデータセットと訂正システムの改良その他、モデルの詳細については当社ブログ記事をご参照ください誤字脱字検出モデルをHugging Face Hubに公開しました (Re", "url": "https://huggingface.co/recruit-jp/japanese-typo-detector-roberta-base", "project_name": "japanese-typo-detector-roberta-base", "downloads": 589, "source": "Hugging Face", "score": -0.04792356968913714, "first_commit": "2023-11-09 06:27:40", "latest_commit": "2023-12-21 03:07:31", "languages": [], "model_or_dataset": "model", "model_size": 0.0996, "model_architectures": "RobertaForTokenClassification", "multi_labels": [ "Language Models", "Semantic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-13b-NVE-hf", "project_name": "Swallow-13b-NVE-hf", "downloads": 589, "source": "Hugging Face", "score": -0.04792356968913714, "first_commit": "2024-01-30 11:39:05", "latest_commit": "2024-06-29 08:56:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "BERT Base Japanese for Irony", "url": "https://huggingface.co/kit-nlp/bert-base-japanese-sentiment-irony", "project_name": "bert-base-japanese-sentiment-irony", "downloads": 586, "source": "Hugging Face", "score": -0.04794486706622068, "first_commit": "2022-11-07 06:29:21", "latest_commit": "2022-11-08 04:23:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Language Models", "Stylistic Analysis", "Semantic Text Processing", "Sentiment Analysis" ] }, { "description": "llm-jp-3-440m-instruct3 LLM-jp-3 is the series of large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-440m-instruct3", "project_name": "llm-jp-3-440m-instruct3", "downloads": 584, "source": "Hugging Face", "score": -0.0479590653176097, "first_commit": "2025-01-31 01:18:21", "latest_commit": "2025-02-04 04:56:45", "languages": [], "model_or_dataset": "model", "model_size": 0.447, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "llm-jp-3-150m-instruct3 LLM-jp-3 is the series of large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-150m-instruct3", "project_name": "llm-jp-3-150m-instruct3", "downloads": 584, "source": "Hugging Face", "score": -0.0479590653176097, "first_commit": "2025-01-31 01:15:34", "latest_commit": "2025-02-04 04:55:04", "languages": [], "model_or_dataset": "model", "model_size": 0.152, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-plus-hf", "project_name": "Swallow-7b-plus-hf", "downloads": 584, "source": "Hugging Face", "score": -0.0479590653176097, "first_commit": "2024-02-29 11:28:52", "latest_commit": "2024-06-29 08:56:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Qwen2.5 Bakeneko 32B Instruct V2 GGUF (rinna/qwen2.5-bakeneko-32b-instruct-v2-gguf)", "url": "https://huggingface.co/rinna/qwen2.5-bakeneko-32b-instruct-v2-gguf", "project_name": "qwen2.5-bakeneko-32b-instruct-v2-gguf", "downloads": 582, "source": "Hugging Face", "score": -0.04797326356899872, "first_commit": "2025-03-16 16:13:04", "latest_commit": "2025-03-23 12:21:32", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "RakutenAI-2.0-mini-instruct-gguf Rakutenさんが公開しているRakutenAI-2.0-mini-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/RakutenAI-2.0-mini-instruct-gguf", "project_name": "RakutenAI-2.0-mini-instruct-gguf", "downloads": 582, "source": "Hugging Face", "score": -0.04797326356899872, "first_commit": "2025-02-12 08:42:32", "latest_commit": "2025-02-12 09:12:53", "languages": [], "model_or_dataset": "model", "model_size": 1.53, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "ABEJA-QwQ32b-Reasoning-Japanese-v1.0 ABEJA-QwQ32b-Reasoning-Japanese-v1.0はabeja/ABEJA-Qwen2.5-32b-Japanese-v0.1(*)", "url": "https://huggingface.co/abeja/ABEJA-QwQ32b-Reasoning-Japanese-v1.0", "project_name": "ABEJA-QwQ32b-Reasoning-Japanese-v1.0", "downloads": 580, "source": "Hugging Face", "score": -0.047987461820387746, "first_commit": "2025-03-25 06:37:07", "latest_commit": "2025-04-16 12:55:30", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Reasoning" ] }, { "description": "llm-jp-13b-instruct-full-dolly_en-dolly_ja-ichikara_003_001-oasst_en-oasst_ja-v1.1", "url": "https://huggingface.co/llm-jp/llm-jp-13b-instruct-full-dolly_en-dolly_ja-ichikara_003_001-oasst_en-oasst_ja-v1.1", "project_name": "llm-jp-13b-instruct-full-dolly_en-dolly_ja-ichikara_003_001-oasst_en-oasst_ja-v1.1", "downloads": 577, "source": "Hugging Face", "score": -0.04800875919747128, "first_commit": "2024-01-29 12:52:31", "latest_commit": "2024-02-07 19:49:25", "languages": [], "model_or_dataset": "model", "model_size": 12.9, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "(English part follows Japanese one.", "url": "https://huggingface.co/tohoku-nlp/tohokunlp-bert-500m-sq4096-alpha", "project_name": "tohokunlp-bert-500m-sq4096-alpha", "downloads": 576, "source": "Hugging Face", "score": -0.04801585832316579, "first_commit": "2024-11-26 08:36:05", "latest_commit": "2024-11-29 06:18:24", "languages": [], "model_or_dataset": "model", "model_size": 0.581, "model_architectures": "LlamaEncForMaskedLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "hotchpotch/japanese-reranker-cross-encoder-base-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", "url": "https://huggingface.co/hotchpotch/japanese-reranker-cross-encoder-base-v1", "project_name": "japanese-reranker-cross-encoder-base-v1", "downloads": 574, "source": "Hugging Face", "score": -0.04803005657455481, "first_commit": "2024-03-29 07:07:38", "latest_commit": "2024-04-01 02:39:31", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Language Models" ] }, { "description": "bilingual-gpt-neox-4b-instruction-ppo Overview This repository provides an English-Japanese bilingual GPT-NeoX model of 3.8 billion parameters.", "url": "https://huggingface.co/rinna/bilingual-gpt-neox-4b-instruction-ppo", "project_name": "bilingual-gpt-neox-4b-instruction-ppo", "downloads": 570, "source": "Hugging Face", "score": -0.048058453077332856, "first_commit": "2023-08-02 05:56:07", "latest_commit": "2024-07-20 08:05:14", "languages": [], "model_or_dataset": "model", "model_size": 3.95, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Multilinguality", "Dialogue Systems & Conversational Agents", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "This repository is publicly accessible, but you have to accept the conditions to access its files and content.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-alpha-7b", "project_name": "japanese-stablelm-instruct-alpha-7b", "downloads": 569, "source": "Hugging Face", "score": -0.04806555220302737, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "llm-jp-3-980m-instruct3 LLM-jp-3 is the series of large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-980m-instruct3", "project_name": "llm-jp-3-980m-instruct3", "downloads": 569, "source": "Hugging Face", "score": -0.04806555220302737, "first_commit": "2025-01-31 01:19:58", "latest_commit": "2025-02-04 04:57:22", "languages": [], "model_or_dataset": "model", "model_size": 0.99, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "luke-japanese luke-japanese is the Japanese version of LUKE (Language Understanding with Knowledge-based Embeddings), a pre-trained knowledge-enhanced contextualized representation of words and entities.", "url": "https://huggingface.co/studio-ousia/luke-japanese-base-lite", "project_name": "luke-japanese-base-lite", "downloads": 567, "source": "Hugging Face", "score": -0.04807975045441639, "first_commit": "2022-10-25 09:27:16", "latest_commit": "2022-11-09 15:22:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForMaskedLM", "multi_labels": [ "Representation Learning", "Knowledge Representation", "Semantic Text Processing" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-instruct-v0.1", "project_name": "Swallow-7b-instruct-v0.1", "downloads": 565, "source": "Hugging Face", "score": -0.04809394870580541, "first_commit": "2024-03-04 08:46:03", "latest_commit": "2024-07-06 15:18:14", "languages": [], "model_or_dataset": "model", "model_size": 6.83, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Japanese-StableLM-Instruct-JAVocab-Beta-7B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-instruct-ja_vocab-beta-7b is a 7B-parameter decoder-only language model based on japanese-stablelm-ja_vocab-beta-7b and further fine tuned on Databricks Dolly-15k, Anthropic HH, and other public data.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-ja_vocab-beta-7b", "project_name": "japanese-stablelm-instruct-ja_vocab-beta-7b", "downloads": 563, "source": "Hugging Face", "score": -0.04810814695719443, "first_commit": "2023-10-30 07:49:38", "latest_commit": "2023-12-19 06:46:01", "languages": [], "model_or_dataset": "model", "model_size": 6.88, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "Llama 3.1 Swallow - Built with Llama Llama 3.1 Swallow is a series of large language models (8B, 70B) that were built by continual pre-training on the Meta Llama 3.1 models.", "url": "https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-70B-v0.1", "project_name": "Llama-3.1-Swallow-70B-v0.1", "downloads": 563, "source": "Hugging Face", "score": -0.04810814695719443, "first_commit": "2024-09-17 01:18:25", "latest_commit": "2024-10-08 14:15:35", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This is the information integration of erai-raws and myanimelist.", "url": "https://huggingface.co/datasets/deepghs/erairaws_infos", "project_name": "erairaws_infos", "downloads": 549, "source": "Hugging Face", "score": -0.04820753471691759, "first_commit": "2025-03-11 09:02:45", "latest_commit": "2025-03-11 12:18:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "rinna-llama-3-youko-70b-instruct-gguf rinnaさんが公開しているllama-3-youko-70b-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/rinna-llama-3-youko-70b-instruct-gguf", "project_name": "rinna-llama-3-youko-70b-instruct-gguf", "downloads": 546, "source": "Hugging Face", "score": -0.048228832094001126, "first_commit": "2024-07-27 09:04:09", "latest_commit": "2024-07-31 14:35:52", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "karakuri-lm-70b-chat-v0.1-gguf karakuri-aiさんが公開しているkarakuri-lm-70b-chat-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/karakuri-lm-70b-chat-v0.1-gguf", "project_name": "karakuri-lm-70b-chat-v0.1-gguf", "downloads": 545, "source": "Hugging Face", "score": -0.04823593121969564, "first_commit": "2024-02-01 10:42:18", "latest_commit": "2024-05-09 15:22:45", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "mt5_summarize_japanese (Japanese caption : 日本語の要約のモデル)", "url": "https://huggingface.co/tsmatz/mt5_summarize_japanese", "project_name": "mt5_summarize_japanese", "downloads": 542, "source": "Hugging Face", "score": -0.048257228596779174, "first_commit": "2022-11-26 10:51:27", "latest_commit": "2024-07-12 00:01:31", "languages": [], "model_or_dataset": "model", "model_size": 0.3, "model_architectures": "MT5ForConditionalGeneration", "multi_labels": [ "Information Extraction & Text Mining", "Summarization", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "qwq-bakeneko-32b-gguf rinnaさんが公開しているqwq-bakeneko-32bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/qwq-bakeneko-32b-gguf", "project_name": "qwq-bakeneko-32b-gguf", "downloads": 542, "source": "Hugging Face", "score": -0.048257228596779174, "first_commit": "2025-03-17 09:24:57", "latest_commit": "2025-03-17 14:51:16", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [] }, { "description": "stockmark/gpt-neox-japanese-1.4b This repository provides a GPT-NeoX based model with 1.4B parameters pre-trained on Japanese corpus of about 20B tokens.", "url": "https://huggingface.co/stockmark/gpt-neox-japanese-1.4b", "project_name": "gpt-neox-japanese-1.4b", "downloads": 542, "source": "Hugging Face", "score": -0.048257228596779174, "first_commit": "2023-08-06 07:37:38", "latest_commit": "2023-09-07 03:44:19", "languages": [], "model_or_dataset": "model", "model_size": 1.44, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "llm-jp-3-13b-instruct3-gguf llm-jpさんが公開しているllm-jp-3-13b-instruct3のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/llm-jp-3-13b-instruct3-gguf", "project_name": "llm-jp-3-13b-instruct3-gguf", "downloads": 541, "source": "Hugging Face", "score": -0.04826432772247368, "first_commit": "2025-02-05 10:28:34", "latest_commit": "2025-02-05 15:15:47", "languages": [], "model_or_dataset": "model", "model_size": 13.7, "model_architectures": null, "multi_labels": [] }, { "description": "Dataset.", "url": "https://huggingface.co/datasets/hpprc/jsick", "project_name": "jsick", "downloads": 541, "source": "Hugging Face", "score": -0.04826432772247368, "first_commit": "2023-04-08 16:02:06", "latest_commit": "2023-04-11 15:18:09", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "ABEJA-Qwen2.5-7b-Japanese-v0.1 ABEJA-Qwen2.5-7b-Japanese-v0.1はQwen/Qwen2.5-7B-Instructをベースに日本語の学習をしたモデルです。 ", "url": "https://huggingface.co/abeja/ABEJA-Qwen2.5-7b-Japanese-v0.1", "project_name": "ABEJA-Qwen2.5-7b-Japanese-v0.1", "downloads": 537, "source": "Hugging Face", "score": -0.04829272422525173, "first_commit": "2025-03-12 10:40:36", "latest_commit": "2025-04-16 10:37:11", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "nlp-waseda/roberta-base-japanese-with-auto-jumanpp Model description", "url": "https://huggingface.co/nlp-waseda/roberta-base-japanese-with-auto-jumanpp", "project_name": "roberta-base-japanese-with-auto-jumanpp", "downloads": 536, "source": "Hugging Face", "score": -0.04829982335094624, "first_commit": "2022-10-15 05:09:36", "latest_commit": "2022-10-21 10:57:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "bert-base-japanese-wikipedia-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-wikipedia-ud-head", "project_name": "bert-base-japanese-wikipedia-ud-head", "downloads": 535, "source": "Hugging Face", "score": -0.04830692247664075, "first_commit": "2022-06-20 21:58:53", "latest_commit": "2023-03-04 20:16:55", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForQuestionAnswering", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Semantic Text Processing" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-13b-instruct-v0.1", "project_name": "Swallow-13b-instruct-v0.1", "downloads": 535, "source": "Hugging Face", "score": -0.04830692247664075, "first_commit": "2024-03-04 11:30:28", "latest_commit": "2024-06-29 09:00:15", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "llm-jp-3-980m LLM-jp-3 is the series of large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-980m", "project_name": "llm-jp-3-980m", "downloads": 534, "source": "Hugging Face", "score": -0.048314021602335264, "first_commit": "2025-01-27 04:37:47", "latest_commit": "2025-02-04 04:56:59", "languages": [], "model_or_dataset": "model", "model_size": 0.99, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "ElanMT ElanMT-BT-en-ja is a English to Japanese translation model developed by ELAN MITSUA Project / Abstract Engine.", "url": "https://huggingface.co/Mitsua/elan-mt-bt-en-ja", "project_name": "elan-mt-bt-en-ja", "downloads": 529, "source": "Hugging Face", "score": -0.04834951723080782, "first_commit": "2024-05-20 01:51:18", "latest_commit": "2024-05-20 01:53:38", "languages": [], "model_or_dataset": "model", "model_size": 0.0606, "model_architectures": "MarianMTModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-NVE-hf", "project_name": "Swallow-7b-NVE-hf", "downloads": 524, "source": "Hugging Face", "score": -0.048385012859280374, "first_commit": "2023-11-30 09:02:26", "latest_commit": "2024-06-29 08:56:18", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "calm3-22b-RP-v2 GGUF版はこちら/Click here for the GGUF version また、こちらで本モデルのデモを公開しています。", "url": "https://huggingface.co/Aratako/calm3-22b-RP-v2", "project_name": "calm3-22b-RP-v2", "downloads": 520, "source": "Hugging Face", "score": -0.04841340936205842, "first_commit": "2024-09-12 11:29:23", "latest_commit": "2024-09-16 05:53:42", "languages": [], "model_or_dataset": "model", "model_size": 22.5, "model_architectures": "LlamaForCausalLM", "multi_labels": [] }, { "description": "Qwen1.5-110B-Chat-gguf Qwenさんが公開しているQwen1.5-110B-Chatのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Qwen1.5-110B-Chat-gguf", "project_name": "Qwen1.5-110B-Chat-gguf", "downloads": 518, "source": "Hugging Face", "score": -0.04842760761344744, "first_commit": "2024-04-27 19:35:48", "latest_commit": "2024-04-28 08:09:17", "languages": [], "model_or_dataset": "model", "model_size": 111.0, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "HODACHI-EZO-Common-T2-2B-gemma-2-it-gguf HODACHIさんが公開しているEZO-Common-T2-2B-gemma-2-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/HODACHI-EZO-Common-T2-2B-gemma-2-it-gguf", "project_name": "HODACHI-EZO-Common-T2-2B-gemma-2-it-gguf", "downloads": 516, "source": "Hugging Face", "score": -0.048441805864836464, "first_commit": "2024-08-01 17:32:31", "latest_commit": "2024-08-01 18:38:31", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-NVE-instruct-hf", "project_name": "Swallow-7b-NVE-instruct-hf", "downloads": 514, "source": "Hugging Face", "score": -0.04845600411622549, "first_commit": "2023-12-07 02:08:59", "latest_commit": "2024-07-06 15:18:11", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Sentence BERT base Japanese model This repository contains a Sentence BERT base model for Japanese.", "url": "https://huggingface.co/colorfulscoop/sbert-base-ja", "project_name": "sbert-base-ja", "downloads": 512, "source": "Hugging Face", "score": -0.04847020236761451, "first_commit": "2021-08-01 04:12:28", "latest_commit": "2021-08-08 15:47:42", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertModel", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese-StableLM-Base-Beta-7B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-base-beta-7b is a 7B-parameter decoder-only language model based on Llama-2-7b that has been fine-tuned on a diverse collection of Japanese data, with the intent of maximizing downstream performance on Japanese language tasks.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-beta-7b", "project_name": "japanese-stablelm-base-beta-7b", "downloads": 512, "source": "Hugging Face", "score": -0.04847020236761451, "first_commit": "2023-10-30 07:43:36", "latest_commit": "2023-12-19 06:43:01", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "Japan Diverse Images Dataset Overview This dataset is a comprehensive collection of high-quality images capturing the diverse aspects of Japan, including urban landscapes, natural scenery, historical sites, contemporary art, everyday life, and culinary experiences.", "url": "https://huggingface.co/datasets/ThePioneer/japanese-photos", "project_name": "japanese-photos", "downloads": 505, "source": "Hugging Face", "score": -0.04851989624747609, "first_commit": "2024-07-26 14:59:08", "latest_commit": "2024-07-26 15:53:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Visual Data in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "pfnet-Llama3-Preferred-MedSwallow-70B-gguf pfnetさんが公開しているLlama3-Preferred-MedSwallow-70Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/pfnet-Llama3-Preferred-MedSwallow-70B-gguf", "project_name": "pfnet-Llama3-Preferred-MedSwallow-70B-gguf", "downloads": 503, "source": "Hugging Face", "score": -0.048534094498865116, "first_commit": "2024-07-18 15:45:16", "latest_commit": "2024-07-19 09:14:38", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ElanMT ElanMT-BT-ja-en is a Japanese to English translation model developed by ELAN MITSUA Project / Abstract Engine.", "url": "https://huggingface.co/Mitsua/elan-mt-bt-ja-en", "project_name": "elan-mt-bt-ja-en", "downloads": 501, "source": "Hugging Face", "score": -0.048548292750254136, "first_commit": "2024-05-20 01:56:12", "latest_commit": "2024-05-20 01:56:57", "languages": [], "model_or_dataset": "model", "model_size": 0.0606, "model_architectures": "MarianMTModel", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "PLaMo-13B-Instruct-NC Model Description PLaMo-13B-Instruct-NC is a noncommercial instruct fine-tuned model built upon the 8192 context length version of PLaMo-13B text generation model.", "url": "https://huggingface.co/pfnet/plamo-13b-instruct-nc", "project_name": "plamo-13b-instruct-nc", "downloads": 499, "source": "Hugging Face", "score": -0.04856249100164316, "first_commit": "2023-10-26 05:36:26", "latest_commit": "2024-01-25 07:46:45", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "PlamoForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "HODACHI-Borea-Phi-3.5-mini-Instruct-Jp-gguf HODACHIさんが公開しているBorea-Phi-3.5-mini-Instruct-Jpのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/HODACHI-Borea-Phi-3.5-mini-Instruct-Jp-gguf", "project_name": "HODACHI-Borea-Phi-3.5-mini-Instruct-Jp-gguf", "downloads": 498, "source": "Hugging Face", "score": -0.04856959012733767, "first_commit": "2024-08-21 09:58:41", "latest_commit": "2024-08-21 11:08:38", "languages": [], "model_or_dataset": "model", "model_size": 3.82, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "JQaRA : Japanese Question Answering with Retrieval Augmentation - 検索拡張(RAG)評価のための日本語 Q&A データセット高性能な LLM の台頭に伴い、LLM を用いた質疑応答のユースケースが増加しています。", "url": "https://huggingface.co/datasets/hotchpotch/JQaRA", "project_name": "JQaRA", "downloads": 490, "source": "Hugging Face", "score": -0.04862638313289376, "first_commit": "2024-03-03 01:58:34", "latest_commit": "2024-08-10 02:56:05", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Information Retrieval", "Question Answering" ] }, { "description": "Model Card for Japanese DeBERTa V2 large Model description This is a Japanese DeBERTa V2 large model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/deberta-v2-large-japanese", "project_name": "deberta-v2-large-japanese", "downloads": 485, "source": "Hugging Face", "score": -0.048661878761366316, "first_commit": "2023-01-07 07:45:25", "latest_commit": "2023-05-12 14:10:35", "languages": [], "model_or_dataset": "model", "model_size": 0.373, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "QuantFactory/gemma-2-2b-jpn-it-GGUF", "url": "https://huggingface.co/QuantFactory/gemma-2-2b-jpn-it-GGUF", "project_name": "gemma-2-2b-jpn-it-GGUF", "downloads": 485, "source": "Hugging Face", "score": -0.048661878761366316, "first_commit": "2024-10-03 13:41:27", "latest_commit": "2024-10-03 14:13:21", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "A* CCG Parser with a Supertag and Dependency Factored Model", "url": "https://github.com/masashi-y/depccg", "project_name": "depccg", "stargazers_count": 95, "source": "GitHub", "score": -0.04868263750058559, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Tagging" ] }, { "description": "50k English-Japanese Parallel Corpus for Machine Translation Benchmark.", "url": "https://github.com/odashi/small_parallel_enja", "project_name": "small_parallel_enja", "stargazers_count": 95, "source": "GitHub", "score": -0.04868263750058559, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "おーぷん2ちゃんねるをクロールして作成した対話コーパス", "url": "https://github.com/1never/open2ch-dialogue-corpus", "project_name": "open2ch-dialogue-corpus", "stargazers_count": 95, "source": "GitHub", "score": -0.04868263750058559, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Kotoba-Whisper-v1.1 Kotoba-Whisper-v1.1 is a Japanese ASR model based on kotoba-tech/kotoba-whisper-v1.0, with additional postprocessing stacks integrated as pipeline.", "url": "https://huggingface.co/kotoba-tech/kotoba-whisper-v1.1", "project_name": "kotoba-whisper-v1.1", "downloads": 479, "source": "Hugging Face", "score": -0.048704473515533385, "first_commit": "2024-04-29 14:53:48", "latest_commit": "2024-05-08 15:34:40", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "[Llama-3.1-70B-EZO-1.1-it] Model Card モデル情報 / Model Information このモデルは、Meta AI の Llama 3.1 をベースに、日本語タスクでの性能を向上させるためにファインチューニングを行ったものです。", "url": "https://huggingface.co/HODACHI/Llama-3.1-70B-EZO-1.1-it", "project_name": "Llama-3.1-70B-EZO-1.1-it", "downloads": 479, "source": "Hugging Face", "score": -0.048704473515533385, "first_commit": "2024-07-29 01:35:35", "latest_commit": "2024-08-04 06:16:58", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "※llama.cpp Releases b3428(7/21)", "url": "https://huggingface.co/MCZK/EZO-Humanities-9B-gemma-2-it-GGUF", "project_name": "EZO-Humanities-9B-gemma-2-it-GGUF", "downloads": 474, "source": "Hugging Face", "score": -0.04873996914400594, "first_commit": "2024-07-10 22:02:03", "latest_commit": "2024-07-21 18:11:21", "languages": [], "model_or_dataset": "model", "model_size": 9.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "defamation_japanese_twitter Twitter日本語誹謗中傷検出データセット Dataset Summary SNSにおける誹謗中傷検出のためのデータセットです． ", "url": "https://huggingface.co/datasets/kubota/defamation-japanese-twitter", "project_name": "defamation-japanese-twitter", "downloads": 473, "source": "Hugging Face", "score": -0.048747068269700454, "first_commit": "2023-01-20 06:50:46", "latest_commit": "2023-02-06 18:26:10", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "このモデルはluke-japanese-baseをファインチューニングして、固有表現抽出（NER）に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-finetuned-ner", "project_name": "luke-japanese-base-finetuned-ner", "downloads": 468, "source": "Hugging Face", "score": -0.04878256389817301, "first_commit": "2023-01-17 23:36:52", "latest_commit": "2023-05-12 00:36:17", "languages": [], "model_or_dataset": "model", "model_size": 0.279, "model_architectures": "LukeForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition" ] }, { "description": "About static quants of https://huggingface.co/lightblue/DeepSeek-R1-Distill-Qwen-7B-Japanese weighted/imatrix quants seem not to be available (by me) at this time.", "url": "https://huggingface.co/mradermacher/DeepSeek-R1-Distill-Qwen-7B-Japanese-GGUF", "project_name": "DeepSeek-R1-Distill-Qwen-7B-Japanese-GGUF", "downloads": 461, "source": "Hugging Face", "score": -0.048832257778034585, "first_commit": "2025-01-24 20:18:01", "latest_commit": "2025-01-29 19:33:18", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": null, "multi_labels": [] }, { "description": "[Under Construction]", "url": "https://huggingface.co/datasets/bclavie/mmarco-japanese-hard-negatives", "project_name": "mmarco-japanese-hard-negatives", "downloads": 455, "source": "Hugging Face", "score": -0.048874852532201654, "first_commit": "2023-12-24 13:04:27", "latest_commit": "2023-12-24 18:52:04", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Information Retrieval", "Annotation and Dataset Development" ] }, { "description": "概要 Imatrixにはneody/imatrix_datasetを使用しました。 ", "url": "https://huggingface.co/neody/sarashina2.2-3b-instruct-v0.1-gguf", "project_name": "sarashina2.2-3b-instruct-v0.1-gguf", "downloads": 453, "source": "Hugging Face", "score": -0.048889050783590675, "first_commit": "2025-03-05 06:35:29", "latest_commit": "2025-03-05 07:54:02", "languages": [], "model_or_dataset": "model", "model_size": 3.36, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "llm-jp-3-150m LLM-jp-3 is the series of large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-150m", "project_name": "llm-jp-3-150m", "downloads": 451, "source": "Hugging Face", "score": -0.0489032490349797, "first_commit": "2025-01-27 04:26:44", "latest_commit": "2025-02-04 04:53:27", "languages": [], "model_or_dataset": "model", "model_size": 0.152, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "HODACHI-EZO-Common-9B-gemma-2-it-gguf HODACHIさんが公開しているEZO-Common-9B-gemma-2-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/HODACHI-EZO-Common-9B-gemma-2-it-gguf", "project_name": "HODACHI-EZO-Common-9B-gemma-2-it-gguf", "downloads": 451, "source": "Hugging Face", "score": -0.0489032490349797, "first_commit": "2024-07-15 15:42:39", "latest_commit": "2024-07-15 16:20:33", "languages": [], "model_or_dataset": "model", "model_size": 9.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "DataPilot様の Llama3-ArrowSE-8B-v0.3 をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Llama3-ArrowSE-8B-v0.3-GGUF", "project_name": "Llama3-ArrowSE-8B-v0.3-GGUF", "downloads": 449, "source": "Hugging Face", "score": -0.04891744728636872, "first_commit": "2024-07-07 07:53:32", "latest_commit": "2024-07-07 13:40:26", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "nlp-waseda/roberta-base-japanese Model description This is a Japanese RoBERTa base model pretrained on Japanese Wikipedia and the Japanese portion of CC-100.", "url": "https://huggingface.co/nlp-waseda/roberta-base-japanese", "project_name": "roberta-base-japanese", "downloads": 441, "source": "Hugging Face", "score": -0.04897424029192481, "first_commit": "2021-12-20 05:12:06", "latest_commit": "2022-10-21 14:46:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Tanuki-8x8B-dpo-v1.0-GGUF 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8x8B-dpo-v1.0のGGUF量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-GGUF", "project_name": "Tanuki-8x8B-dpo-v1.0-GGUF", "downloads": 432, "source": "Hugging Face", "score": -0.049038132423175416, "first_commit": "2024-08-14 18:48:45", "latest_commit": "2024-08-29 17:42:37", "languages": [], "model_or_dataset": "model", "model_size": 47.0, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "tokyotech-llm-Llama-3.1-Swallow-70B-Instruct-v0.1-gguf tokyotech-llmさんが公開しているLlama-3.1-Swallow-70B-Instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/tokyotech-llm-Llama-3.1-Swallow-70B-Instruct-v0.1-gguf", "project_name": "tokyotech-llm-Llama-3.1-Swallow-70B-Instruct-v0.1-gguf", "downloads": 431, "source": "Hugging Face", "score": -0.04904523154886992, "first_commit": "2024-10-10 01:38:48", "latest_commit": "2024-10-10 09:34:20", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ModernBERT-Ja-70M This repository provides Japanese ModernBERT trained by SB Intuitions.", "url": "https://huggingface.co/sbintuitions/modernbert-ja-70m", "project_name": "modernbert-ja-70m", "downloads": 426, "source": "Hugging Face", "score": -0.04908072717734248, "first_commit": "2025-02-19 10:26:31", "latest_commit": "2025-02-20 02:58:40", "languages": [], "model_or_dataset": "model", "model_size": 0.0703, "model_architectures": "ModernBertForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "ascktgcc/Mistral-nemo-ja-rp-v0.2のGGUF版です。", "url": "https://huggingface.co/ascktgcc/Mistral-nemo-ja-rp-v0.2-GGUF", "project_name": "Mistral-nemo-ja-rp-v0.2-GGUF", "downloads": 426, "source": "Hugging Face", "score": -0.04908072717734248, "first_commit": "2024-10-25 06:16:53", "latest_commit": "2024-10-25 06:57:17", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "高性能な日本語 SPLADE (Sparse Lexical and Expansion Model) モデルです。", "url": "https://huggingface.co/hotchpotch/japanese-splade-base-v1", "project_name": "japanese-splade-base-v1", "downloads": 426, "source": "Hugging Face", "score": -0.04908072717734248, "first_commit": "2024-10-06 19:37:14", "latest_commit": "2024-10-07 01:59:02", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Heron BLIP Japanese StableLM", "url": "https://huggingface.co/turing-motors/heron-chat-blip-ja-stablelm-base-7b-v1", "project_name": "heron-chat-blip-ja-stablelm-base-7b-v1", "downloads": 424, "source": "Hugging Face", "score": -0.049094925428731506, "first_commit": "2024-02-20 11:32:57", "latest_commit": "2024-02-27 13:57:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VideoBlipForConditionalGeneration", "multi_labels": [ "Visual Data in NLP", "Language Models", "Multimodality" ] }, { "description": "datagemma-rag-27b-it-gguf googleさんが公開しているdatagemma-rag-27b-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/datagemma-rag-27b-it-gguf", "project_name": "datagemma-rag-27b-it-gguf", "downloads": 424, "source": "Hugging Face", "score": -0.049094925428731506, "first_commit": "2024-09-12 18:03:45", "latest_commit": "2024-09-12 19:57:32", "languages": [], "model_or_dataset": "model", "model_size": 27.2, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-Base", "project_name": "Orion-14B-Base", "downloads": 421, "source": "Hugging Face", "score": -0.04911622280581504, "first_commit": "2024-01-16 06:07:42", "latest_commit": "2024-03-26 09:21:52", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OrionForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "I'm constantly enhancing these model descriptions to provide you with the most relevant and comprehensive information japanese-stablelm-3b-4e1t-base - GGUF Model creator: stabilityai Original model: japanese-stablelm-3b-4e1t-base StableLM", "url": "https://huggingface.co/maddes8cht/stabilityai-japanese-stablelm-3b-4e1t-base-gguf", "project_name": "stabilityai-japanese-stablelm-3b-4e1t-base-gguf", "downloads": 417, "source": "Hugging Face", "score": -0.04914461930859308, "first_commit": "2023-11-16 10:23:21", "latest_commit": "2023-11-16 11:18:48", "languages": [], "model_or_dataset": "model", "model_size": 2.8, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "QuantFactory/shisa-7b-v1-GGUF This is quantized version of augmxnt/shisa-base-7b-v1 created using llama.cpp Model Description shisa-base-7b-v1 takes Mistral 7B and adds an additional 8B tokens of primarily Japanese pre-training.", "url": "https://huggingface.co/QuantFactory/shisa-7b-v1-GGUF", "project_name": "shisa-7b-v1-GGUF", "downloads": 417, "source": "Hugging Face", "score": -0.04914461930859308, "first_commit": "2024-06-14 01:44:05", "latest_commit": "2024-06-18 05:53:41", "languages": [], "model_or_dataset": "model", "model_size": 7.96, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "XLNet-japanese Model description This model require Mecab and senetencepiece with XLNetTokenizer.", "url": "https://huggingface.co/hajime9652/xlnet-japanese", "project_name": "xlnet-japanese", "downloads": 414, "source": "Hugging Face", "score": -0.04916591668567662, "first_commit": "2021-04-01 03:12:11", "latest_commit": "2023-01-05 04:28:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "XLNetLMHeadModel", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "Model Card For gemma-2-2b-jpn-it-gguf Googleさんのgemma-2-2b-jpn-itを量子化したものたちです。 ", "url": "https://huggingface.co/alfredplpl/gemma-2-2b-jpn-it-gguf", "project_name": "gemma-2-2b-jpn-it-gguf", "downloads": 412, "source": "Hugging Face", "score": -0.04918011493706564, "first_commit": "2024-10-03 05:57:17", "latest_commit": "2024-10-03 10:02:29", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", "url": "https://huggingface.co/stabilityai/japanese-stable-clip-vit-l-16", "project_name": "japanese-stable-clip-vit-l-16", "downloads": 410, "source": "Hugging Face", "score": -0.049194313188454665, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 0.41400000000000003, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Natural Language Interfaces", "Ethical NLP", "Dialogue Systems & Conversational Agents" ] }, { "description": "Ninja-v1-128k-gguf Local-Novel-LLM-projectさんが公開しているNinja-v1-128kのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Ninja-v1-128k-gguf", "project_name": "Ninja-v1-128k-gguf", "downloads": 408, "source": "Hugging Face", "score": -0.049208511439843686, "first_commit": "2024-05-01 17:48:06", "latest_commit": "2024-05-04 13:25:20", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "llm-jp-3-3.7b", "url": "https://huggingface.co/llm-jp/llm-jp-3-3.7b", "project_name": "llm-jp-3-3.7b", "downloads": 403, "source": "Hugging Face", "score": -0.04924400706831624, "first_commit": "2024-09-23 12:25:32", "latest_commit": "2024-09-26 18:20:09", "languages": [], "model_or_dataset": "model", "model_size": 3.78, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "モデル説明 (model explanation) CoolJapanDiffusion 2.1.1とWaifuDiffusion 1.4 anime epoch2のマージ。", "url": "https://huggingface.co/ThePioneer/CoolerWaifuDiffusion", "project_name": "CoolerWaifuDiffusion", "downloads": 399, "source": "Hugging Face", "score": -0.04927240357109429, "first_commit": "2023-01-20 23:52:39", "latest_commit": "2023-01-22 19:16:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Explainability & Interpretability in NLP" ] }, { "description": "sbintuitions/sarashina2.2-1b-instruct-v0.1 Model Summary", "url": "https://huggingface.co/sbintuitions/sarashina2.2-1b-instruct-v0.1", "project_name": "sarashina2.2-1b-instruct-v0.1", "downloads": 386, "source": "Hugging Face", "score": -0.049364692205122934, "first_commit": "2025-02-26 02:08:37", "latest_commit": "2025-03-05 07:01:50", "languages": [], "model_or_dataset": "model", "model_size": 1.41, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Explainability & Interpretability in NLP" ] }, { "description": "llm-japanese-dataset LLM構築用の日本語インストラクション(チャット)データセット主に，英語で構築されたLLMモデルなどに対して，チャット(Instruction)応答タスクに関してLoRAなどでチューニングするために使用できます． ", "url": "https://huggingface.co/datasets/izumi-lab/llm-japanese-dataset", "project_name": "llm-japanese-dataset", "downloads": 382, "source": "Hugging Face", "score": -0.049393088707900976, "first_commit": "2023-04-30 06:13:24", "latest_commit": "2024-01-18 13:42:50", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Semantic Text Processing" ] }, { "description": "Japanese-Starling-ChatV-7B-GGUF GGUF conversion of \"Japanese-Starling-ChatV-7B\" \"Japanese-Starling-ChatV-7B\" is a Japanese chat model built on top of \"chatntq-ja-7b-v1.0\", originally based on Mistral-7B-v0.1.", "url": "https://huggingface.co/TFMC/Japanese-Starling-ChatV-7B-GGUF", "project_name": "Japanese-Starling-ChatV-7B-GGUF", "downloads": 379, "source": "Hugging Face", "score": -0.04941438608498451, "first_commit": "2024-04-14 12:42:01", "latest_commit": "2024-04-20 01:23:10", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "luke-japanese luke-japanese is the Japanese version of LUKE (Language Understanding with Knowledge-based Embeddings), a pre-trained knowledge-enhanced contextualized representation of words and entities.", "url": "https://huggingface.co/studio-ousia/luke-japanese-base", "project_name": "luke-japanese-base", "downloads": 373, "source": "Hugging Face", "score": -0.04945698083915158, "first_commit": "2022-10-25 06:30:23", "latest_commit": "2022-11-09 15:23:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForMaskedLM", "multi_labels": [ "Representation Learning", "Knowledge Representation", "Semantic Text Processing" ] }, { "description": "BERT small Japanese finance This is a BERT model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/bert-small-japanese", "project_name": "bert-small-japanese", "downloads": 371, "source": "Hugging Face", "score": -0.0494711790905406, "first_commit": "2021-10-04 13:09:36", "latest_commit": "2022-12-09 00:40:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "JIC-VQA Dataset Description Japanese Image Classification Visual Question Answering (JIC-VQA)", "url": "https://huggingface.co/datasets/line-corporation/JIC-VQA", "project_name": "JIC-VQA", "downloads": 369, "source": "Hugging Face", "score": -0.04948537734192963, "first_commit": "2025-02-28 05:49:54", "latest_commit": "2025-03-24 00:18:36", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Natural Language Interfaces", "Visual Data in NLP", "Information Retrieval", "Question Answering", "Text Classification", "Annotation and Dataset Development" ] }, { "description": "About static quants of https://huggingface.co/abeja/ABEJA-Qwen2.5-7b-Japanese-v0.1 weighted/imatrix quants are available at https://huggingface.co/mradermacher/ABEJA-Qwen2.5-7b-Japanese-v0.1-i1-GGUF Usage If you are unsure how to use GGUF files, refer to one of TheBloke's READMEs for more details, including on how to concatenate multi-part files.", "url": "https://huggingface.co/mradermacher/ABEJA-Qwen2.5-7b-Japanese-v0.1-GGUF", "project_name": "ABEJA-Qwen2.5-7b-Japanese-v0.1-GGUF", "downloads": 367, "source": "Hugging Face", "score": -0.04949957559331865, "first_commit": "2025-04-22 06:00:49", "latest_commit": "2025-04-22 07:10:48", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "llm-jp-corpus-v3のkakenサブセット中の日本語テキストを、Qwen/Qwen2.5-32B-Instructを用いて日本語から英語に翻訳したデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/kaken-translations-ja-en", "project_name": "kaken-translations-ja-en", "downloads": 365, "source": "Hugging Face", "score": -0.04951377384470767, "first_commit": "2024-11-29 13:02:31", "latest_commit": "2025-01-09 04:09:18", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "llm-book/aio-passages のデータセットに対して、llm-book/bert-base-japanese-v3-bpr-passage-encoder によるパッセージのバイナリベクトルが embeddings フィールドに追加されています。 ", "url": "https://huggingface.co/datasets/llm-book/aio-passages-bpr-bert-base-japanese-v3", "project_name": "aio-passages-bpr-bert-base-japanese-v3", "downloads": 364, "source": "Hugging Face", "score": -0.04952087297040218, "first_commit": "2023-06-06 08:24:36", "latest_commit": "2023-06-30 10:30:40", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "In this study, we introduce a new dataset, WRIME, for emotional intensity estimation.", "url": "https://huggingface.co/datasets/shunk031/wrime", "project_name": "wrime", "downloads": 364, "source": "Hugging Face", "score": -0.04952087297040218, "first_commit": "2023-01-12 10:43:54", "latest_commit": "2023-01-15 12:39:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "This dataset is a clarified version of the image, context, and question set included in the Japanese-Heron-Bench for the construction of the Japanese evaluation benchmark suite.", "url": "https://huggingface.co/datasets/Silviase/Japanese-Heron-Bench", "project_name": "Japanese-Heron-Bench", "downloads": 363, "source": "Hugging Face", "score": -0.0495279720960967, "first_commit": "2024-07-16 08:12:30", "latest_commit": "2024-07-28 12:33:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Visual Data in NLP", "Annotation and Dataset Development" ] }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-large-long", "project_name": "t5-large-long", "downloads": 361, "source": "Hugging Face", "score": -0.04954217034748572, "first_commit": "2023-04-26 08:33:12", "latest_commit": "2023-05-10 10:00:35", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Fine-tuned Japanese Whisper model for speech recognition using whisper-small Fine-tuned openai/whisper-small on Japanese using Common Voice, JVS and JSUT.", "url": "https://huggingface.co/Ivydata/whisper-small-japanese", "project_name": "whisper-small-japanese", "downloads": 359, "source": "Hugging Face", "score": -0.04955636859887474, "first_commit": "2023-05-19 10:42:27", "latest_commit": "2023-05-19 10:50:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "karakuri-lm-32b-thinking-2501-exp-gguf karakuri-aiさんが公開しているkarakuri-lm-32b-thinking-2501-expのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/karakuri-lm-32b-thinking-2501-exp-gguf", "project_name": "karakuri-lm-32b-thinking-2501-exp-gguf", "downloads": 356, "source": "Hugging Face", "score": -0.04957766597595827, "first_commit": "2025-01-24 13:13:47", "latest_commit": "2025-01-24 18:46:06", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "QuantFactory/Llama3.1-ArrowSE-v0.4-GGUF This is quantized version of DataPilot/Llama3.1-ArrowSE-v0.4 created using llama.cpp Original Model Card 概要このモデルはllama3.1-8B-instructをもとに日本語性能を高めることを目的にMergekit&ファインチューニングを用いて作成されました。 ", "url": "https://huggingface.co/QuantFactory/Llama3.1-ArrowSE-v0.4-GGUF", "project_name": "Llama3.1-ArrowSE-v0.4-GGUF", "downloads": 354, "source": "Hugging Face", "score": -0.04959186422734729, "first_commit": "2024-07-28 06:17:48", "latest_commit": "2024-07-28 06:57:40", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Umievo-itr012-Gleipnir-7B-GGUF", "url": "https://huggingface.co/QuantFactory/Umievo-itr012-Gleipnir-7B-GGUF", "project_name": "Umievo-itr012-Gleipnir-7B-GGUF", "downloads": 353, "source": "Hugging Face", "score": -0.04959896335304181, "first_commit": "2024-06-09 03:48:10", "latest_commit": "2024-06-09 13:12:32", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Tanuki-8B-dpo-v1.0-GPTQ-4bit 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0のGPTQ 4bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-GPTQ-4bit", "project_name": "Tanuki-8B-dpo-v1.0-GPTQ-4bit", "downloads": 350, "source": "Hugging Face", "score": -0.04962026073012534, "first_commit": "2024-08-27 16:17:17", "latest_commit": "2024-09-03 09:29:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "recruit-jp/japanese-clip-vit-b-32-roberta-base Overview Developed by: Recruit Co.", "url": "https://huggingface.co/recruit-jp/japanese-clip-vit-b-32-roberta-base", "project_name": "japanese-clip-vit-b-32-roberta-base", "downloads": 349, "source": "Hugging Face", "score": -0.04962735985581985, "first_commit": "2023-12-20 06:06:12", "latest_commit": "2024-01-22 07:41:59", "languages": [], "model_or_dataset": "model", "model_size": 0.198, "model_architectures": "JapaneseCLIPModel", "multi_labels": [ "Representation Learning", "Visual Data in NLP", "Multimodality" ] }, { "description": "ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF", "url": "https://huggingface.co/QuantFactory/ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF", "project_name": "ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF", "downloads": 348, "source": "Hugging Face", "score": -0.04963445898151436, "first_commit": "2024-07-05 05:56:09", "latest_commit": "2024-07-13 13:29:45", "languages": [], "model_or_dataset": "model", "model_size": 1.1, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Sakugabooru2025:", "url": "https://huggingface.co/datasets/trojblue/sakugabooru2025", "project_name": "sakugabooru2025", "downloads": 348, "source": "Hugging Face", "score": -0.04963445898151436, "first_commit": "2024-12-29 13:51:08", "latest_commit": "2024-12-30 06:21:48", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "llm-jp-3-8x13b-instruct3 LLM-jp-3 is the series of large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-8x13b-instruct3", "project_name": "llm-jp-3-8x13b-instruct3", "downloads": 345, "source": "Hugging Face", "score": -0.0496557563585979, "first_commit": "2025-03-05 17:26:30", "latest_commit": "2025-04-01 12:35:08", "languages": [], "model_or_dataset": "model", "model_size": 73.2, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "llm-jp-3-3.7b-instruct3 LLM-jp-3 is the series of large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-3.7b-instruct3", "project_name": "llm-jp-3-3.7b-instruct3", "downloads": 343, "source": "Hugging Face", "score": -0.04966995460998692, "first_commit": "2025-01-31 01:26:33", "latest_commit": "2025-02-04 04:58:04", "languages": [], "model_or_dataset": "model", "model_size": 3.78, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "hubert-base-asr", "url": "https://huggingface.co/TKU410410103/hubert-base-japanese-asr", "project_name": "hubert-base-japanese-asr", "downloads": 343, "source": "Hugging Face", "score": -0.04966995460998692, "first_commit": "2024-04-09 06:01:43", "latest_commit": "2024-04-14 13:20:43", "languages": [], "model_or_dataset": "model", "model_size": 0.0945, "model_architectures": "HubertForCTC", "multi_labels": [ "Speech Recognition", "Text Generation", "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "llm-book/t5-base-long-livedoor-news-corpus 「大規模言語モデル入門」の第7章で紹介している要約生成のモデルです。 ", "url": "https://huggingface.co/llm-book/t5-base-long-livedoor-news-corpus", "project_name": "t5-base-long-livedoor-news-corpus", "downloads": 341, "source": "Hugging Face", "score": -0.049684152861375945, "first_commit": "2023-06-27 13:32:54", "latest_commit": "2023-07-25 13:10:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Cross-Encoder for Natural Language Inference(NLI) for Japanese Considering the results of the JNLI evaluation result, we recommend using akiFQC/bert-base-japanese-v3_nli-jsnli-jnli-jsick for natural language inference in Japanese.", "url": "https://huggingface.co/akiFQC/bert-base-japanese-v3_nli-jsnli", "project_name": "bert-base-japanese-v3_nli-jsnli", "downloads": 339, "source": "Hugging Face", "score": -0.049698351112764966, "first_commit": "2024-04-11 05:38:09", "latest_commit": "2024-04-26 06:27:05", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Reasoning", "Textual Inference", "Language Models", "Semantic Text Processing" ] }, { "description": "rinna/japanese-gpt-neox-3.6b rinnaさんが公開しているjapanese-gpt-neox-3.6bのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/rinna-japanese-gpt-neox-3.6b-gguf", "project_name": "rinna-japanese-gpt-neox-3.6b-gguf", "downloads": 338, "source": "Hugging Face", "score": -0.04970545023845947, "first_commit": "2023-09-02 18:46:08", "latest_commit": "2023-09-08 02:37:19", "languages": [], "model_or_dataset": "model", "model_size": 3.61, "model_architectures": null, "multi_labels": [ "Multilinguality", "Language Models", "Semantic Text Processing" ] }, { "description": "This is a BERT Base model for emotion analysis in Japanese additionally fine-tuned for emotion detection and classification.", "url": "https://huggingface.co/alter-wang/bert-base-japanese-emotion-lily", "project_name": "bert-base-japanese-emotion-lily", "downloads": 338, "source": "Hugging Face", "score": -0.04970545023845947, "first_commit": "2024-04-25 06:05:51", "latest_commit": "2024-06-17 01:44:16", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/senryu-test\", split=\"test\") 概要川柳投稿サイトの『写真川柳』と『川柳投稿まるせん』のクロールデータ、および YANS 委員が作成したデータを含みます。 ", "url": "https://huggingface.co/datasets/YANS-official/senryu-test", "project_name": "senryu-test", "downloads": 338, "source": "Hugging Face", "score": -0.04970545023845947, "first_commit": "2024-09-03 15:02:47", "latest_commit": "2024-09-09 05:53:26", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Text Generation", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "google様の google/gemma-2-2b-jpn-it をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/gemma-2-2b-jpn-it-GGUF", "project_name": "gemma-2-2b-jpn-it-GGUF", "downloads": 333, "source": "Hugging Face", "score": -0.049740945866932035, "first_commit": "2024-10-03 07:21:06", "latest_commit": "2024-10-03 09:30:22", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "DeBERTa V2 base Japanese This is a DeBERTaV2 model pretrained on Japanese texts.", "url": "https://huggingface.co/izumi-lab/deberta-v2-base-japanese", "project_name": "deberta-v2-base-japanese", "downloads": 332, "source": "Hugging Face", "score": -0.04974804499262654, "first_commit": "2023-10-21 13:24:11", "latest_commit": "2024-07-19 03:07:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Anime with caption CC-0 dataset このデータセットはイラストに対する日本語キャプションを倫理的に学習しやすくするためのデータセットです。 ", "url": "https://huggingface.co/datasets/alfredplpl/anime-with-caption-cc0", "project_name": "anime-with-caption-cc0", "downloads": 331, "source": "Hugging Face", "score": -0.049755144118321055, "first_commit": "2024-06-03 04:37:13", "latest_commit": "2024-06-03 05:49:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Captioning", "Text Generation", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Lux Japanese Speech Corpus 概要 Lux Japanese Speech Corpus は、オリジナルキャラクター「Lux (ルクス)」による日本語のテキスト読み上げ音声を収録したデータセットです。", "url": "https://huggingface.co/datasets/Lami/Lux-Japanese-Speech-Corpus", "project_name": "Lux-Japanese-Speech-Corpus", "downloads": 325, "source": "Hugging Face", "score": -0.049797738872488125, "first_commit": "2025-02-24 14:51:53", "latest_commit": "2025-02-24 18:02:56", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Mistral-Nemo-Instruct-2407-gguf mistralaiさんが公開しているMistral-Nemo-Instruct-2407のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Mistral-Nemo-Instruct-2407-gguf", "project_name": "Mistral-Nemo-Instruct-2407-gguf", "downloads": 318, "source": "Hugging Face", "score": -0.0498474327523497, "first_commit": "2024-07-22 13:28:13", "latest_commit": "2024-07-22 17:25:48", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "abc-multiple-choice Dataset abc-multiple-choice は、競技クイズの大会「abc」で使用された4択問題を元に作成された、多肢選択式の質問応答データセットです。 ", "url": "https://huggingface.co/datasets/tohoku-nlp/abc-multiple-choice", "project_name": "abc-multiple-choice", "downloads": 318, "source": "Hugging Face", "score": -0.0498474327523497, "first_commit": "2024-03-02 03:58:25", "latest_commit": "2024-03-12 07:32:13", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Llama3 Swallow - Built with Meta Llama 3", "url": "https://huggingface.co/tokyotech-llm/Llama-3-Swallow-70B-v0.1", "project_name": "Llama-3-Swallow-70B-v0.1", "downloads": 317, "source": "Hugging Face", "score": -0.049854531878044214, "first_commit": "2024-06-14 05:56:33", "latest_commit": "2024-07-01 06:24:32", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Wav2Vec2-Large-Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice, JSUT, TEDxJP and some other data.", "url": "https://huggingface.co/NTQAI/wav2vec2-large-japanese", "project_name": "wav2vec2-large-japanese", "downloads": 314, "source": "Hugging Face", "score": -0.04987582925512775, "first_commit": "2021-07-05 02:44:40", "latest_commit": "2023-02-17 13:07:47", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "DeepSeek-R1-Distill-Qwen-7B-gguf deepseek-aiさんが公開しているDeepSeek-R1-Distill-Qwen-7Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/DeepSeek-R1-Distill-Qwen-7B-gguf", "project_name": "DeepSeek-R1-Distill-Qwen-7B-gguf", "downloads": 314, "source": "Hugging Face", "score": -0.04987582925512775, "first_commit": "2025-01-20 16:02:58", "latest_commit": "2025-01-20 18:26:22", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "Fugaku-LLM利用規約この利用規約（以下「本規約」といいます）は、富士通株式会社、国立研究開発法人理化学研究所、国立大学法人東京工業大学、国立大学法人東北大学、株式会社サイバーエージェント、国立大学法人東海国立大学機構、及び株式会社Kotoba Technologies Japan (以下「開発者」といいます)による、スーパーコンピュータ「富岳」政策対応枠における大規模言語モデル分散並列学習手法の開発の成果物として公開する大規模言語モデル（以下「Fugaku-LLM」といいます）の利用に関する条件を定めるものです。", "url": "https://huggingface.co/Fugaku-LLM/Fugaku-LLM-13B-instruct", "project_name": "Fugaku-LLM-13B-instruct", "downloads": 314, "source": "Hugging Face", "score": -0.04987582925512775, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 13.2, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "HODACHI-Borea-Phi-3.5-mini-Instruct-Common-gguf HODACHIさんが公開しているBorea-Phi-3.5-mini-Instruct-Commonのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/HODACHI-Borea-Phi-3.5-mini-Instruct-Common-gguf", "project_name": "HODACHI-Borea-Phi-3.5-mini-Instruct-Common-gguf", "downloads": 312, "source": "Hugging Face", "score": -0.04989002750651677, "first_commit": "2024-08-21 10:33:58", "latest_commit": "2024-08-21 11:42:56", "languages": [], "model_or_dataset": "model", "model_size": 3.82, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "About static quants of https://huggingface.co/cyberagent/DeepSeek-R1-Distill-Qwen-14B-Japanese weighted/imatrix quants seem not to be available (by me) at this time.", "url": "https://huggingface.co/mradermacher/DeepSeek-R1-Distill-Qwen-14B-Japanese-GGUF", "project_name": "DeepSeek-R1-Distill-Qwen-14B-Japanese-GGUF", "downloads": 310, "source": "Hugging Face", "score": -0.04990422575790579, "first_commit": "2025-02-11 11:30:53", "latest_commit": "2025-02-11 11:46:16", "languages": [], "model_or_dataset": "model", "model_size": 14.8, "model_architectures": null, "multi_labels": [] }, { "description": "SpeechT5 (TTS task) for Japanese SpeechT5 model fine-tuned for Japanese speech synthesis (text-to-speech)", "url": "https://huggingface.co/esnya/japanese_speecht5_tts", "project_name": "japanese_speecht5_tts", "downloads": 309, "source": "Hugging Face", "score": -0.049911324883600304, "first_commit": "2023-08-08 18:37:40", "latest_commit": "2023-08-09 09:25:38", "languages": [], "model_or_dataset": "model", "model_size": 0.14400000000000002, "model_architectures": "SpeechT5ForTextToSpeech", "multi_labels": [ "Representation Learning", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "RoBERTa base Japanese - JaQuAD Description A Japanese Question Answering model fine-tuned on JaQuAD.", "url": "https://huggingface.co/ybelkada/japanese-roberta-question-answering", "project_name": "japanese-roberta-question-answering", "downloads": 302, "source": "Hugging Face", "score": -0.04996101876346188, "first_commit": "2022-04-08 08:52:22", "latest_commit": "2022-04-08 11:38:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForQuestionAnswering", "multi_labels": [ "Natural Language Interfaces", "Question Answering", "Language Models", "Semantic Text Processing" ] }, { "description": "range3/cc100-ja This dataset consists of parquet files from the cc100 dataset with only the Japanese language extracted and sharded.", "url": "https://huggingface.co/datasets/range3/cc100-ja", "project_name": "cc100-ja", "downloads": 302, "source": "Hugging Face", "score": -0.04996101876346188, "first_commit": "2023-02-04 05:10:34", "latest_commit": "2023-02-04 05:43:32", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "このデータセットについてこのデータは、日本の官公庁のWebサイトに掲載されている「よくある質問」を手作業で抽出し、インストラクション用のデータセットとしたものです。 ", "url": "https://huggingface.co/datasets/matsuxr/JaGovFaqs-22k", "project_name": "JaGovFaqs-22k", "downloads": 301, "source": "Hugging Face", "score": -0.049968117889156394, "first_commit": "2023-12-31 13:58:41", "latest_commit": "2024-02-29 02:51:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "QwQ Bakeneko 32B (rinna/qwq-bakeneko-32b)", "url": "https://huggingface.co/rinna/qwq-bakeneko-32b", "project_name": "qwq-bakeneko-32b", "downloads": 299, "source": "Hugging Face", "score": -0.049982316140545414, "first_commit": "2025-03-12 17:21:35", "latest_commit": "2025-03-23 12:16:25", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "rinna/japanese-gpt-neox-3.6b-instruction-ppo rinnaさんが公開しているjapanese-gpt-neox-3.6b-instruction-ppoのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/rinna-japanese-gpt-neox-3.6b-instruction-ppo-gguf", "project_name": "rinna-japanese-gpt-neox-3.6b-instruction-ppo-gguf", "downloads": 299, "source": "Hugging Face", "score": -0.049982316140545414, "first_commit": "2023-09-02 17:52:26", "latest_commit": "2023-09-08 02:39:00", "languages": [], "model_or_dataset": "model", "model_size": 3.61, "model_architectures": null, "multi_labels": [ "Multilinguality", "Language Models", "Semantic Text Processing" ] }, { "description": "tokyotech-llm様の Llama-3-Swallow-8B-Instruct-v0.1 をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Llama-3-Swallow-8B-Instruct-v0.1-GGUF", "project_name": "Llama-3-Swallow-8B-Instruct-v0.1-GGUF", "downloads": 299, "source": "Hugging Face", "score": -0.049982316140545414, "first_commit": "2024-07-01 11:45:22", "latest_commit": "2024-07-01 17:54:05", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "rinna様の rinna/gemma-2-baku-2b-it をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/gemma-2-baku-2b-it-GGUF", "project_name": "gemma-2-baku-2b-it-GGUF", "downloads": 296, "source": "Hugging Face", "score": -0.05000361351762895, "first_commit": "2024-10-05 01:55:49", "latest_commit": "2024-10-05 04:03:23", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "JA-VG-VQA-500 Dataset Description JA-VG-VQA-500 is a 500-sample subset of Japanese Visual Genome VQA dataset.", "url": "https://huggingface.co/datasets/SakanaAI/JA-VG-VQA-500", "project_name": "JA-VG-VQA-500", "downloads": 296, "source": "Hugging Face", "score": -0.05000361351762895, "first_commit": "2024-03-21 09:51:10", "latest_commit": "2024-05-14 04:11:31", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "AutoWikiQA 東工大が公開しているSwallow-MXを用いて、Wikipedia中のテキストを入力として「質問(query)」と「回答(answer)」を生成し、生成された質問と回答についてフィルタリングを行ったデータセットです。", "url": "https://huggingface.co/datasets/cl-nagoya/auto-wiki-qa", "project_name": "auto-wiki-qa", "downloads": 290, "source": "Hugging Face", "score": -0.05004620827179602, "first_commit": "2024-03-28 01:33:42", "latest_commit": "2024-04-20 12:17:33", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Retrieval", "Annotation and Dataset Development" ] }, { "description": "cyberagent-Mistral-Nemo-Japanese-Instruct-2408-gguf cyberagentさんが公開しているMistral-Nemo-Japanese-Instruct-2408のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/cyberagent-Mistral-Nemo-Japanese-Instruct-2408-gguf", "project_name": "cyberagent-Mistral-Nemo-Japanese-Instruct-2408-gguf", "downloads": 289, "source": "Hugging Face", "score": -0.05005330739749053, "first_commit": "2025-01-25 12:37:13", "latest_commit": "2025-01-27 12:36:56", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": null, "multi_labels": [] }, { "description": "Llama-3-EZO-VLM-1 Based on SakanaAI/Llama-3-EvoVLM-JP-v2, it has been enhanced for Japanese usage through additional pre-training and instruction tuning.", "url": "https://huggingface.co/HODACHI/Llama-3-EZO-VLM-1", "project_name": "Llama-3-EZO-VLM-1", "downloads": 289, "source": "Hugging Face", "score": -0.05005330739749053, "first_commit": "2024-08-03 17:15:09", "latest_commit": "2024-08-04 23:20:43", "languages": [], "model_or_dataset": "model", "model_size": 8.48, "model_architectures": "LlavaForConditionalGeneration", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "magpie-easy-math-instruction-88k-qwen2.5-bakeneko-32b-instruct rinna/qwen2.5-bakeneko-32b-instructを用いたMagpieで生成した合成Instructionデータセットです。 ", "url": "https://huggingface.co/datasets/Kendamarron/magpie-easy-math-instruction-88k-qwen2.5-bakeneko-32b-instruct", "project_name": "magpie-easy-math-instruction-88k-qwen2.5-bakeneko-32b-instruct", "downloads": 285, "source": "Hugging Face", "score": -0.05008170390026857, "first_commit": "2025-03-17 23:02:58", "latest_commit": "2025-03-20 15:35:40", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Numerical Reasoning", "Reasoning", "Language Models", "Low-Resource NLP" ] }, { "description": "オリジナルのサイトと同じものを使用しています。 ", "url": "https://huggingface.co/datasets/llm-book/llm-jp-eval", "project_name": "llm-jp-eval", "downloads": 283, "source": "Hugging Face", "score": -0.050095902151657594, "first_commit": "2024-06-19 10:31:57", "latest_commit": "2024-08-31 12:40:31", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Annotation and Dataset Development" ] }, { "description": "SakanaAI-EvoLLM-JP-v1-7B-gguf SakanaAIさんが公開しているEvoLLM-JP-v1-7Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/SakanaAI-EvoLLM-JP-v1-7B-gguf", "project_name": "SakanaAI-EvoLLM-JP-v1-7B-gguf", "downloads": 282, "source": "Hugging Face", "score": -0.05010300127735211, "first_commit": "2024-03-21 13:04:25", "latest_commit": "2024-03-21 14:41:04", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "About weighted/imatrix quants of https://huggingface.co/nk2t/Llama-3-8B-Instruct-japanese-nk2t-v0.2 static quants are available at https://huggingface.co/mradermacher/Llama-3-8B-Instruct-japanese-nk2t-v0.2-GGUF Usage", "url": "https://huggingface.co/mradermacher/Llama-3-8B-Instruct-japanese-nk2t-v0.2-i1-GGUF", "project_name": "Llama-3-8B-Instruct-japanese-nk2t-v0.2-i1-GGUF", "downloads": 281, "source": "Hugging Face", "score": -0.05011010040304662, "first_commit": "2025-01-02 00:37:22", "latest_commit": "2025-01-02 01:21:11", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This is for (private) DEMO only.", "url": "https://huggingface.co/Bagus/wav2vec2-xlsr-japanese-speech-emotion-recognition", "project_name": "wav2vec2-xlsr-japanese-speech-emotion-recognition", "downloads": 280, "source": "Hugging Face", "score": -0.05011719952874113, "first_commit": "2021-09-22 04:10:36", "latest_commit": "2023-10-19 01:31:17", "languages": [], "model_or_dataset": "model", "model_size": 0.316, "model_architectures": "HubertForSequenceClassification", "multi_labels": [ "Responsible & Trustworthy NLP", "Ethical NLP" ] }, { "description": "Model Card for llm-jp-clip-vit-large-patch14 Model Details Japanese CLIP model trained with OpenCLIP on relaion2B-en-research-safe-japanese-translation, a Japanese translation of the English subset of ReLAION-5B (https://huggingface.co/datasets/laion/relaion2B-en-research-safe),", "url": "https://huggingface.co/llm-jp/llm-jp-clip-vit-large-patch14", "project_name": "llm-jp-clip-vit-large-patch14", "downloads": 277, "source": "Hugging Face", "score": -0.05013849690582466, "first_commit": "2024-12-27 12:39:35", "latest_commit": "2025-02-14 13:41:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Converted from clu-ling/whisper-large-v2-japanese-5k-steps using CTranslate2.", "url": "https://huggingface.co/zh-plus/faster-whisper-large-v2-japanese-5k-steps", "project_name": "faster-whisper-large-v2-japanese-5k-steps", "downloads": 277, "source": "Hugging Face", "score": -0.05013849690582466, "first_commit": "2023-07-03 08:29:37", "latest_commit": "2023-07-03 18:42:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Japanese to Korean translator Japanese to Korean translator model based on EncoderDecoderModel(bert-japanese+kogpt2)", "url": "https://huggingface.co/sappho192/aihub-ja-ko-translator", "project_name": "aihub-ja-ko-translator", "downloads": 277, "source": "Hugging Face", "score": -0.05013849690582466, "first_commit": "2024-02-05 00:51:47", "latest_commit": "2024-06-28 06:38:39", "languages": [], "model_or_dataset": "model", "model_size": 0.265, "model_architectures": "EncoderDecoderModel", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Semantic Text Processing" ] }, { "description": "LMSYS-Chat-1M-Synth: Japanese/English Synthetic Conversation Dataset Derived from LMSYS-Chat-1M", "url": "https://huggingface.co/datasets/tokyotech-llm/lmsys-chat-1m-synth", "project_name": "lmsys-chat-1m-synth", "downloads": 277, "source": "Hugging Face", "score": -0.05013849690582466, "first_commit": "2024-10-24 01:29:56", "latest_commit": "2025-01-24 20:57:22", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Dialogue Systems & Conversational Agents", "Language Models" ] }, { "description": "About static quants of https://huggingface.co/abeja/Mixtral-8x7B-v0.1-japanese weighted/imatrix quants seem not to be available (by me) at this time.", "url": "https://huggingface.co/mradermacher/Mixtral-8x7B-v0.1-japanese-GGUF", "project_name": "Mixtral-8x7B-v0.1-japanese-GGUF", "downloads": 275, "source": "Hugging Face", "score": -0.050152695157213684, "first_commit": "2024-12-01 01:53:04", "latest_commit": "2024-12-01 07:23:20", "languages": [], "model_or_dataset": "model", "model_size": 46.9, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Synthetic-Japanese-Roleplay-SFW-DeepSeek-V3-0324-20k 概要 deepseek-ai/DeepSeek-V3-0324を用いて作成した、約20000件の日本語ロールプレイの対話を収録した合成データセットです。", "url": "https://huggingface.co/datasets/Aratako/Synthetic-Japanese-Roleplay-SFW-DeepSeek-V3-0324-20k", "project_name": "Synthetic-Japanese-Roleplay-SFW-DeepSeek-V3-0324-20k", "downloads": 274, "source": "Hugging Face", "score": -0.0501597942829082, "first_commit": "2025-04-22 15:52:54", "latest_commit": "2025-04-23 14:07:27", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "Dataset overview This is a dataset for Japanese natural language processing with multi-label annotations of research field labels for GitHub repositories in the NLP domain.", "url": "https://huggingface.co/datasets/taishi-i/awesome-japanese-nlp-multilabel-dataset", "project_name": "awesome-japanese-nlp-multilabel-dataset", "downloads": 274, "source": "Hugging Face", "score": -0.0501597942829082, "first_commit": "2025-02-11 15:50:35", "latest_commit": "2025-03-05 01:55:36", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Text Classification" ] }, { "description": "cyberagent-open-calm-3b-gguf cyberagentさんが公開しているopen-calm-3bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/cyberagent-open-calm-3b-gguf", "project_name": "cyberagent-open-calm-3b-gguf", "downloads": 269, "source": "Hugging Face", "score": -0.05019528991138075, "first_commit": "2023-08-21 10:20:13", "latest_commit": "2023-09-08 03:09:01", "languages": [], "model_or_dataset": "model", "model_size": 2.79, "model_architectures": null, "multi_labels": [ "Semantic Text Processing" ] }, { "description": "Deepreneur-blue-lizard-gguf Deepreneurさんが公開しているblue-lizardのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Deepreneur-blue-lizard-gguf", "project_name": "Deepreneur-blue-lizard-gguf", "downloads": 266, "source": "Hugging Face", "score": -0.05021658728846429, "first_commit": "2024-02-13 15:18:15", "latest_commit": "2024-02-13 16:26:26", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "DeepSeek-R1-Distill-Qwen-14B-gguf deepseek-aiさんが公開しているDeepSeek-R1-Distill-Qwen-14Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/DeepSeek-R1-Distill-Qwen-14B-gguf", "project_name": "DeepSeek-R1-Distill-Qwen-14B-gguf", "downloads": 265, "source": "Hugging Face", "score": -0.0502236864141588, "first_commit": "2025-01-20 16:38:19", "latest_commit": "2025-01-20 20:19:55", "languages": [], "model_or_dataset": "model", "model_size": 14.8, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "Stanza model for Japanese (ja)", "url": "https://huggingface.co/stanfordnlp/stanza-ja", "project_name": "stanza-ja", "downloads": 263, "source": "Hugging Face", "score": -0.05023788466554782, "first_commit": "2021-09-07 12:05:41", "latest_commit": "2024-07-31 05:09:43", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Named Entity Recognition" ] }, { "description": "概要 llm-jp-instructionsは人手により作成されたインストラクションデータセットです。 ", "url": "https://huggingface.co/datasets/llm-jp/llm-jp-instructions", "project_name": "llm-jp-instructions", "downloads": 263, "source": "Hugging Face", "score": -0.05023788466554782, "first_commit": "2025-03-07 05:14:21", "latest_commit": "2025-03-07 10:53:02", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Synthetic-JP-Preference-Dataset-Qwen2.5_72B-191k 概要 5種類のオープンモデルとQwen/Qwen2.5-72B-Instruct-GPTQ-Int8を使って作成した、190854件の日本語合成Preferenceデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-Preference-Dataset-Qwen2.5_72B-191k", "project_name": "Synthetic-JP-Preference-Dataset-Qwen2.5_72B-191k", "downloads": 258, "source": "Hugging Face", "score": -0.05027338029402038, "first_commit": "2025-02-02 07:15:38", "latest_commit": "2025-02-02 08:30:31", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Similarity", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "ELYZA-japanese-CodeLlama-7b Model Description ELYZA-japanese-CodeLlama-7b は、 Code Llamaをベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-CodeLlama-7b-instruct", "project_name": "ELYZA-japanese-CodeLlama-7b-instruct", "downloads": 255, "source": "Hugging Face", "score": -0.05029467767110391, "first_commit": "2023-11-07 12:04:07", "latest_commit": "2023-11-17 05:01:00", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Dataset Dataset Summary This dataset is designed for automatic speech recognition (ASR) and translation tasks, enabling the conversion of Spanish speech into Japanese text.", "url": "https://huggingface.co/datasets/Marianoleiras/voxpopuli_es-ja", "project_name": "voxpopuli_es-ja", "downloads": 254, "source": "Hugging Face", "score": -0.050301776796798425, "first_commit": "2024-12-08 21:55:33", "latest_commit": "2025-01-13 15:16:49", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "japanese-wav2vec2-large-rs35kh", "url": "https://huggingface.co/reazon-research/japanese-wav2vec2-large-rs35kh", "project_name": "japanese-wav2vec2-large-rs35kh", "downloads": 253, "source": "Hugging Face", "score": -0.05030887592249293, "first_commit": "2024-11-29 14:38:26", "latest_commit": "2024-11-29 15:03:27", "languages": [], "model_or_dataset": "model", "model_size": 0.319, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "[Llama-3-EZO model card]", "url": "https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it", "project_name": "Llama-3-EZO-8b-Common-it", "downloads": 250, "source": "Hugging Face", "score": -0.05033017329957647, "first_commit": "2024-07-13 06:42:31", "latest_commit": "2024-08-04 06:16:37", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Ruri: Japanese General Text Embeddings ⚠", "url": "https://huggingface.co/cl-nagoya/ruri-v3-pt-30m", "project_name": "ruri-v3-pt-30m", "downloads": 249, "source": "Hugging Face", "score": -0.05033727242527098, "first_commit": "2025-03-20 05:23:50", "latest_commit": "2025-04-18 05:31:38", "languages": [], "model_or_dataset": "model", "model_size": 0.0367, "model_architectures": "ModernBertModel", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Mistral-Large-Instruct-2407-gguf mistralaiさんが公開しているMistral-Large-Instruct-2407のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Mistral-Large-Instruct-2407-gguf", "project_name": "Mistral-Large-Instruct-2407-gguf", "downloads": 249, "source": "Hugging Face", "score": -0.05033727242527098, "first_commit": "2024-07-24 18:59:58", "latest_commit": "2024-07-26 12:21:45", "languages": [], "model_or_dataset": "model", "model_size": 123.0, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Uploaded model Developed by: nappa0326 License: apache-2.0 Finetuned from model : elyza/Llama-3-ELYZA-JP-8B このモデルはLlama-3-ELYZA-JP-8Bをこのデータセットを使ってファインチューニングしたものです。 ", "url": "https://huggingface.co/nappa0326/llama-3-elyza-jp-8b-ft-functioncalling-gguf", "project_name": "llama-3-elyza-jp-8b-ft-functioncalling-gguf", "downloads": 248, "source": "Hugging Face", "score": -0.050344371550965494, "first_commit": "2024-10-17 12:30:11", "latest_commit": "2025-01-31 11:59:42", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "This repository contains some GGUF quantizations of the merge of the VNTL LLaMA 3 8B qlora.", "url": "https://huggingface.co/lmg-anon/vntl-llama3-8b-gguf", "project_name": "vntl-llama3-8b-gguf", "downloads": 240, "source": "Hugging Face", "score": -0.050401164556521584, "first_commit": "2024-06-13 17:17:30", "latest_commit": "2024-06-15 17:33:02", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Low-Resource NLP" ] }, { "description": "Synthetic-JP-EN-Coding-Dataset-801k Magpieによって作成したコードSFTデータセットであるAratako/Synthetic-JP-EN-Coding-Dataset-Magpie-69kを元に、Evol-Instructのような手法を用いて複数のinstructionとresonseを生成し拡張して作成した、日英混合801262件のコードSFT用合成データセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-EN-Coding-Dataset-801k", "project_name": "Synthetic-JP-EN-Coding-Dataset-801k", "downloads": 240, "source": "Hugging Face", "score": -0.050401164556521584, "first_commit": "2024-07-14 14:04:30", "latest_commit": "2024-09-28 05:01:14", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "qwen2.5-bakeneko-32b-instruct-gguf rinnaさんが公開しているqwen2.5-bakeneko-32b-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/qwen2.5-bakeneko-32b-instruct-gguf", "project_name": "qwen2.5-bakeneko-32b-instruct-gguf", "downloads": 236, "source": "Hugging Face", "score": -0.050429561059299625, "first_commit": "2025-02-13 04:12:52", "latest_commit": "2025-02-13 10:30:03", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "ms_marco_japanese ms_marco の日本語翻訳データです。 ", "url": "https://huggingface.co/datasets/hotchpotch/ms_marco_japanese", "project_name": "ms_marco_japanese", "downloads": 236, "source": "Hugging Face", "score": -0.050429561059299625, "first_commit": "2024-02-16 23:37:22", "latest_commit": "2024-02-20 09:32:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-2-instruct-1_6b", "project_name": "japanese-stablelm-2-instruct-1_6b", "downloads": 235, "source": "Hugging Face", "score": -0.05043666018499414, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 1.64, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Natural Language Interfaces", "Ethical NLP", "Dialogue Systems & Conversational Agents" ] }, { "description": "RakutenAI-2.0-8x7B-instruct-gguf Rakutenさんが公開しているRakutenAI-2.0-8x7B-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/RakutenAI-2.0-8x7B-instruct-gguf", "project_name": "RakutenAI-2.0-8x7B-instruct-gguf", "downloads": 233, "source": "Hugging Face", "score": -0.05045085843638316, "first_commit": "2025-02-12 07:58:47", "latest_commit": "2025-02-12 16:13:54", "languages": [], "model_or_dataset": "model", "model_size": 46.8, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "本ggufモデルについて about this gguf model gemma-2-2b-itを日本語が多く含まれる重要度行列(iMatrix)を使って量子化したgguf版です。", "url": "https://huggingface.co/dahara1/gemma-2-2b-it-gguf-japanese-imatrix", "project_name": "gemma-2-2b-it-gguf-japanese-imatrix", "downloads": 232, "source": "Hugging Face", "score": -0.050457957562077674, "first_commit": "2024-11-29 05:01:10", "latest_commit": "2024-11-29 12:27:03", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "AKU-d_ms-0.5B-v0.1_dataset Overview このリポジトリは、私の開発しているAKUシリーズの1つ目となる、AKU-d_ms-0.5B-chat-v0.1の事前学習に使用したテキストデータを集めています。 ", "url": "https://huggingface.co/datasets/YukiTomita-CC/AKU-d_ms-0.5B-v0.1_dataset", "project_name": "AKU-d_ms-0.5B-v0.1_dataset", "downloads": 231, "source": "Hugging Face", "score": -0.05046505668777218, "first_commit": "2024-08-31 13:37:35", "latest_commit": "2024-09-15 12:33:54", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Dialogue Systems & Conversational Agents", "Annotation and Dataset Development" ] }, { "description": "Model Card for Model ID Original model elyza/ELYZA-japanese-Llama-2-7b-fast-instruct which is based on Meta's \"Llama 2\" and has undergone additional pre-training in Japanese, and thier original post-training and speed up tuning.", "url": "https://huggingface.co/dahara1/ELYZA-japanese-Llama-2-7b-fast-instruct-GPTQ", "project_name": "ELYZA-japanese-Llama-2-7b-fast-instruct-GPTQ", "downloads": 229, "source": "Hugging Face", "score": -0.05047925493916121, "first_commit": "2023-08-30 09:18:50", "latest_commit": "2023-11-14 00:10:58", "languages": [], "model_or_dataset": "model", "model_size": 1.24, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "Canary-TTS-0.5B sarashina2.2‑0.5b‑instruct‑v0.1 をベースに学習したTTSモデルです。 ", "url": "https://huggingface.co/2121-8/canary-tts-0.5b", "project_name": "canary-tts-0.5b", "downloads": 228, "source": "Hugging Face", "score": -0.050486354064855715, "first_commit": "2025-04-20 04:48:39", "latest_commit": "2025-04-23 06:59:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [] }, { "description": "AXCXEPT-phi-4-open-R1-Distill-EZOv1-gguf AXCXEPTさんが公開しているphi-4-open-R1-Distill-EZOv1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/AXCXEPT-phi-4-open-R1-Distill-EZOv1-gguf", "project_name": "AXCXEPT-phi-4-open-R1-Distill-EZOv1-gguf", "downloads": 227, "source": "Hugging Face", "score": -0.05049345319055023, "first_commit": "2025-01-27 12:31:36", "latest_commit": "2025-01-27 16:04:19", "languages": [], "model_or_dataset": "model", "model_size": 14.7, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "WRIME-fine-tuned BERT base Japanese This model is a Japanese BERTBASE fine-tuned on the WRIME dataset.", "url": "https://huggingface.co/patrickramos/bert-base-japanese-v2-wrime-fine-tune", "project_name": "bert-base-japanese-v2-wrime-fine-tune", "downloads": 227, "source": "Hugging Face", "score": -0.05049345319055023, "first_commit": "2022-05-22 09:42:14", "latest_commit": "2023-03-22 08:11:34", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Language Models", "Emotion Analysis", "Semantic Text Processing", "Sentiment Analysis" ] }, { "description": "Additional pretrained BERT base Japanese finance This is a BERT model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/bert-base-japanese-fin-additional", "project_name": "bert-base-japanese-fin-additional", "downloads": 226, "source": "Hugging Face", "score": -0.05050055231624474, "first_commit": "2022-03-11 17:41:11", "latest_commit": "2022-12-09 00:40:25", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForPreTraining", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "DiffLlama-1B DiffLlama-1Bは、フルスクラッチで約100Bトークン事前学習を行った約1Bパラメータの大規模言語モデルです。", "url": "https://huggingface.co/kajuma/DiffLlama-1B", "project_name": "DiffLlama-1B", "downloads": 226, "source": "Hugging Face", "score": -0.05050055231624474, "first_commit": "2025-03-29 10:50:20", "latest_commit": "2025-03-30 02:43:10", "languages": [], "model_or_dataset": "model", "model_size": 1.39, "model_architectures": "DiffLlamaForCausalLM", "multi_labels": [] }, { "description": "Heron GIT Japanese StableLM", "url": "https://huggingface.co/turing-motors/heron-chat-git-ja-stablelm-base-7b-v1", "project_name": "heron-chat-git-ja-stablelm-base-7b-v1", "downloads": 226, "source": "Hugging Face", "score": -0.05050055231624474, "first_commit": "2024-03-29 09:09:32", "latest_commit": "2024-05-02 07:55:57", "languages": [], "model_or_dataset": "model", "model_size": 7.32, "model_architectures": "GitJapaneseStableLMAlphaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "シンプルずんだもんデータセットはじめにずんだもんの設定が詰まったシンプルなデータセットです。 ", "url": "https://huggingface.co/datasets/alfredplpl/simple-zundamon", "project_name": "simple-zundamon", "downloads": 225, "source": "Hugging Face", "score": -0.05050765144193925, "first_commit": "2023-10-21 15:16:58", "latest_commit": "2023-10-21 16:10:17", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "以下のデータセットの中の256文字以下の行を抽出しマージしました。 ", "url": "https://huggingface.co/datasets/noname0202/merged-ja", "project_name": "merged-ja", "downloads": 225, "source": "Hugging Face", "score": -0.05050765144193925, "first_commit": "2024-12-17 08:21:18", "latest_commit": "2024-12-19 11:26:30", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "日本語Wikipedia中のテキストを元に言い換えを生成し、その言い換えを元にクエリと回答をLLMに生成させたデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/paraphrase-qa", "project_name": "paraphrase-qa", "downloads": 224, "source": "Hugging Face", "score": -0.050514750567633764, "first_commit": "2024-10-25 08:06:52", "latest_commit": "2025-02-11 14:59:45", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Shisa 7B Shisa 7B (shisa-7b-v1)", "url": "https://huggingface.co/augmxnt/shisa-7b-v1", "project_name": "shisa-7b-v1", "downloads": 222, "source": "Hugging Face", "score": -0.050528948819022784, "first_commit": "2023-11-27 17:55:31", "latest_commit": "2023-12-20 18:11:13", "languages": [], "model_or_dataset": "model", "model_size": 7.96, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-Chat-RAG", "project_name": "Orion-14B-Chat-RAG", "downloads": 222, "source": "Hugging Face", "score": -0.050528948819022784, "first_commit": "2024-01-16 12:19:08", "latest_commit": "2024-03-26 10:08:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OrionForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "ODEX is an Open-Domain EXecution-based NL-to-Code generation data benchmark.", "url": "https://huggingface.co/datasets/neulab/odex", "project_name": "odex", "downloads": 220, "source": "Hugging Face", "score": -0.050543147070411805, "first_commit": "2023-01-06 14:30:00", "latest_commit": "2023-02-10 18:01:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation" ] }, { "description": "QwQ-32B-Preview-gguf Qwenさんが公開しているQwQ-32B-Previewのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/QwQ-32B-Preview-gguf", "project_name": "QwQ-32B-Preview-gguf", "downloads": 216, "source": "Hugging Face", "score": -0.05057154357318985, "first_commit": "2024-12-03 16:59:42", "latest_commit": "2024-12-16 17:23:37", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Tanuki-8B-dpo-v1.0-GGUF 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0-4kのGGUF量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-4k-GGUF", "project_name": "Tanuki-8B-dpo-v1.0-4k-GGUF", "downloads": 215, "source": "Hugging Face", "score": -0.05057864269888437, "first_commit": "2024-08-16 12:39:31", "latest_commit": "2024-08-27 18:05:25", "languages": [], "model_or_dataset": "model", "model_size": 7.51, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Wav2Vec2-Large-XLSR-53-Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice and Japanese speech corpus of Saruwatari-lab, University of Tokyo JSUT.", "url": "https://huggingface.co/vumichien/wav2vec2-large-xlsr-japanese", "project_name": "wav2vec2-large-xlsr-japanese", "downloads": 214, "source": "Hugging Face", "score": -0.050585741824578874, "first_commit": "2021-03-28 04:21:20", "latest_commit": "2023-02-08 00:15:23", "languages": [], "model_or_dataset": "model", "model_size": 0.318, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "matsuolab-weblab-10b-instruction-sft-gguf matsuo-labさんが公開しているweblab-10b-instruction-sftのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/matsuolab-weblab-10b-instruction-sft-gguf", "project_name": "matsuolab-weblab-10b-instruction-sft-gguf", "downloads": 214, "source": "Hugging Face", "score": -0.050585741824578874, "first_commit": "2023-08-21 11:22:48", "latest_commit": "2023-09-02 18:16:33", "languages": [], "model_or_dataset": "model", "model_size": 10.7, "model_architectures": null, "multi_labels": [] }, { "description": "Llama-3.1-70B-Japanese-Instruct-2407 Model Description This is a Japanese continually pre-trained model based on meta-llama/Meta-Llama-3.1-70B-Instruct.", "url": "https://huggingface.co/cyberagent/Llama-3.1-70B-Japanese-Instruct-2407", "project_name": "Llama-3.1-70B-Japanese-Instruct-2407", "downloads": 211, "source": "Hugging Face", "score": -0.05060703920166241, "first_commit": "2024-07-26 01:30:21", "latest_commit": "2024-07-26 02:30:17", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "About weighted/imatrix quants of https://huggingface.co/cyberagent/Mistral-Nemo-Japanese-Instruct-2408 static quants are available at https://huggingface.co/mradermacher/Mistral-Nemo-Japanese-Instruct-2408-GGUF Usage If you are unsure how to use GGUF files, refer to one of TheBloke's READMEs for more details, including on how to concatenate multi-part files.", "url": "https://huggingface.co/mradermacher/Mistral-Nemo-Japanese-Instruct-2408-i1-GGUF", "project_name": "Mistral-Nemo-Japanese-Instruct-2408-i1-GGUF", "downloads": 211, "source": "Hugging Face", "score": -0.05060703920166241, "first_commit": "2025-02-27 21:41:18", "latest_commit": "2025-03-01 16:41:40", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "lightblue-Karasu-Mixtral-8x22B-v0.1-gguf lightblueさんが公開しているKarasu-Mixtral-8x22B-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/lightblue-Karasu-Mixtral-8x22B-v0.1-gguf", "project_name": "lightblue-Karasu-Mixtral-8x22B-v0.1-gguf", "downloads": 211, "source": "Hugging Face", "score": -0.05060703920166241, "first_commit": "2024-05-07 12:53:56", "latest_commit": "2024-05-07 18:07:43", "languages": [], "model_or_dataset": "model", "model_size": 141.0, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "range3/wikipedia-ja-20230101", "url": "https://huggingface.co/datasets/range3/wikipedia-ja-20230101", "project_name": "wikipedia-ja-20230101", "downloads": 211, "source": "Hugging Face", "score": -0.05060703920166241, "first_commit": "2023-02-04 04:29:29", "latest_commit": "2023-02-04 05:44:41", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "RetrievaEmbedding-01: AMBER The AMBER (Adaptive Multitask Bilingual Embedding Representations) is a text embedding model trained by Retrieva, Inc.", "url": "https://huggingface.co/retrieva-jp/amber-base", "project_name": "amber-base", "downloads": 210, "source": "Hugging Face", "score": -0.05061413832735692, "first_commit": "2025-03-07 01:10:01", "latest_commit": "2025-03-31 09:07:23", "languages": [], "model_or_dataset": "model", "model_size": 0.132, "model_architectures": "ModernBertModel", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF Original Model elyza/ELYZA-japanese-Llama-2-13b-fast-instruct Run with LlamaEdge LlamaEdge version: v0.2.8 and above Prompt template Prompt type: llama-2-chat Prompt string <s>[INST] <<SYS>> {{ system_prompt }} <</SYS>> {{ user_msg_1 }}", "url": "https://huggingface.co/second-state/ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF", "project_name": "ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF", "downloads": 210, "source": "Hugging Face", "score": -0.05061413832735692, "first_commit": "2024-01-06 03:33:53", "latest_commit": "2024-03-20 07:21:25", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "BERT for Sentiment Analysis of Japanese Twitter", "url": "https://huggingface.co/LoneWolfgang/bert-for-japanese-twitter-sentiment", "project_name": "bert-for-japanese-twitter-sentiment", "downloads": 210, "source": "Hugging Face", "score": -0.05061413832735692, "first_commit": "2024-05-13 10:19:52", "latest_commit": "2024-08-09 12:03:25", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "llm-jp-3-8x1.8b-instruct3 LLM-jp-3 is the series of large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-8x1.8b-instruct3", "project_name": "llm-jp-3-8x1.8b-instruct3", "downloads": 209, "source": "Hugging Face", "score": -0.05062123745305143, "first_commit": "2025-03-12 06:43:09", "latest_commit": "2025-04-01 03:31:54", "languages": [], "model_or_dataset": "model", "model_size": 9.27, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Allganize RAG Leaderboard とは Allganize RAG Leaderboard は、5つの業種ドメイン（金融、情報通信、製造、公共、流通・小売）において、日本語のRAGの性能評価を実施したものです。", "url": "https://huggingface.co/datasets/allganize/RAG-Evaluation-Dataset-JA", "project_name": "RAG-Evaluation-Dataset-JA", "downloads": 209, "source": "Hugging Face", "score": -0.05062123745305143, "first_commit": "2024-09-03 09:00:27", "latest_commit": "2024-09-13 00:53:44", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation" ] }, { "description": "QwQ-32B-gguf Qwenさんが公開しているQwQ-32Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/QwQ-32B-gguf", "project_name": "QwQ-32B-gguf", "downloads": 207, "source": "Hugging Face", "score": -0.05063543570444046, "first_commit": "2025-03-06 16:13:12", "latest_commit": "2025-03-07 00:14:47", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Our Models Vecteus Ninja-v1 Ninja-v1-NSFW Ninja-v1-128k Ninja-v1-NSFW-128k Model Card for VecTeus-v1.0 The Mistral-7B--based Large Language Model (LLM) is an noveldataset fine-tuned version of the Mistral-7B-v0.1 VecTeus has the following changes compared to Mistral-7B-v0.1.", "url": "https://huggingface.co/Local-Novel-LLM-project/Vecteus-v1", "project_name": "Vecteus-v1", "downloads": 205, "source": "Hugging Face", "score": -0.05064963395582948, "first_commit": "2024-05-01 02:08:01", "latest_commit": "2024-05-04 04:07:22", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Model Card for Japanese BART large Model description", "url": "https://huggingface.co/ku-nlp/bart-large-japanese", "project_name": "bart-large-japanese", "downloads": 204, "source": "Hugging Face", "score": -0.05065673308152399, "first_commit": "2023-05-09 07:44:59", "latest_commit": "2023-05-12 11:05:03", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MBartForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "CC-MAIN-2019-30へようこそ本データセットはCommonCrawlerと呼ばれるものから日本語のみを抽出したものです。 ", "url": "https://huggingface.co/datasets/cc-clean/CC-MAIN-2019-30", "project_name": "CC-MAIN-2019-30", "downloads": 204, "source": "Hugging Face", "score": -0.05065673308152399, "first_commit": "2024-12-22 08:01:41", "latest_commit": "2024-12-22 12:36:11", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "alabnii/jmedroberta-base-sentencepiece-vocab50000 Model description This is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", "url": "https://huggingface.co/alabnii/jmedroberta-base-sentencepiece-vocab50000", "project_name": "jmedroberta-base-sentencepiece-vocab50000", "downloads": 202, "source": "Hugging Face", "score": -0.05067093133291301, "first_commit": "2022-12-22 17:22:14", "latest_commit": "2023-06-27 03:44:17", "languages": [], "model_or_dataset": "model", "model_size": 0.124, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Tanuki-8B-dpo-v1.0-GPTQ-8bit 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0のGPTQ 8bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-GPTQ-8bit", "project_name": "Tanuki-8B-dpo-v1.0-GPTQ-8bit", "downloads": 202, "source": "Hugging Face", "score": -0.05067093133291301, "first_commit": "2024-08-27 17:32:47", "latest_commit": "2024-09-03 09:28:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Llama 3.3 Swallow - Built with Llama Llama 3.3 Swallow is a large language model (70B) that was built by continual pre-training on the Meta Llama 3.3 model.", "url": "https://huggingface.co/tokyotech-llm/Llama-3.3-Swallow-70B-v0.4", "project_name": "Llama-3.3-Swallow-70B-v0.4", "downloads": 201, "source": "Hugging Face", "score": -0.05067803045860752, "first_commit": "2025-02-17 11:42:28", "latest_commit": "2025-03-17 02:18:03", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Annotation and Dataset Development" ] }, { "description": "DeepSeek-R1-Distill-Qwen-32B-gguf deepseek-aiさんが公開しているDeepSeek-R1-Distill-Qwen-32Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/DeepSeek-R1-Distill-Qwen-32B-gguf", "project_name": "DeepSeek-R1-Distill-Qwen-32B-gguf", "downloads": 201, "source": "Hugging Face", "score": -0.05067803045860752, "first_commit": "2025-01-20 16:54:59", "latest_commit": "2025-01-20 23:44:54", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "JaQuAD is developed to provide a SQuAD-like QA dataset in Japanese.", "url": "https://huggingface.co/datasets/SkelterLabsInc/JaQuAD", "project_name": "JaQuAD", "downloads": 198, "source": "Hugging Face", "score": -0.05069932783569105, "first_commit": "2022-01-26 01:34:38", "latest_commit": "2022-10-25 09:06:40", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Question Answering", "Annotation and Dataset Development" ] }, { "description": "Atotti/RakutenAI-2.0-mini-instruct-gguf 本リポジトリは、Rakuten/RakutenAI-2.0-mini-instruct をベースに、llama.cpp や text-generation-webui 等のツールで動作するように GGUF 形式に変換したモデルを提供します。", "url": "https://huggingface.co/Atotti/RakutenAI-2.0-mini-instruct-gguf", "project_name": "RakutenAI-2.0-mini-instruct-gguf", "downloads": 197, "source": "Hugging Face", "score": -0.05070642696138557, "first_commit": "2025-02-12 07:28:55", "latest_commit": "2025-02-12 08:53:42", "languages": [], "model_or_dataset": "model", "model_size": 1.53, "model_architectures": null, "multi_labels": [ "Dialogue Response Generation", "Text Generation" ] }, { "description": "◆QuinceMix \"Defacta\"ベースのマージモデルです。 ", "url": "https://huggingface.co/Hemlok/QuinceMix", "project_name": "QuinceMix", "downloads": 194, "source": "Hugging Face", "score": -0.0507277243384691, "first_commit": "2023-03-26 08:50:08", "latest_commit": "2023-03-26 12:33:28", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "SakanaAI-EvoLLM-JP-A-v1-7B-gguf SakanaAIさんが公開しているEvoLLM-JP-A-v1-7Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/SakanaAI-EvoLLM-JP-A-v1-7B-gguf", "project_name": "SakanaAI-EvoLLM-JP-A-v1-7B-gguf", "downloads": 193, "source": "Hugging Face", "score": -0.050734823464163616, "first_commit": "2024-03-21 13:25:41", "latest_commit": "2024-03-21 14:48:28", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "Sarashina2-Vision-14B Sarashina2-Vision-14B is a Japanese Large Vision Language Model trained by SB Intuitions.", "url": "https://huggingface.co/sbintuitions/sarashina2-vision-14b", "project_name": "sarashina2-vision-14b", "downloads": 192, "source": "Hugging Face", "score": -0.05074192258985812, "first_commit": "2025-03-09 21:02:43", "latest_commit": "2025-03-27 02:37:03", "languages": [], "model_or_dataset": "model", "model_size": 14.4, "model_architectures": "Sarashina2VisionForCausalLM", "multi_labels": [ "Visual Data in NLP", "Language Models", "Multimodality" ] }, { "description": "元のデータセットFineWeb2-HQ 元のデータセットは多言語で巨大なため、扱いやすい用に日本語データを約200GBだけ抽出したデータセットです wc 結果 1763269 38541549 5370473709 fineweb_jpn_Jpan_chunk_0.jsonl 1784158 37430170 5370514369 fineweb_jpn_Jpan_chunk_1.jsonl 1639554 40065129 5370372344 fineweb_jpn_Jpan_chunk_10.jsonl 1575127 42167166 5370298354 fineweb_jpn_Jpan_chunk_11.jsonl 1686375 39225898 5370402506 fineweb_jpn_Jpan_chunk_12.jsonl 1786948 36456352 5370498572 fineweb_jpn_Jpan_chunk_13.jsonl 1700447 38657869 5370422377 fineweb_jpn_Jpan_chunk_14.jsonl 1649880 402340", "url": "https://huggingface.co/datasets/dahara1/FineWeb2-HQ-ja-20B", "project_name": "FineWeb2-HQ-ja-20B", "downloads": 190, "source": "Hugging Face", "score": -0.05075612084124714, "first_commit": "2025-04-21 03:57:35", "latest_commit": "2025-04-21 04:59:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "AXCXEPT-EZO-Qwen2.5-72B-Instruct-gguf AXCXEPTさんが公開しているEZO-Qwen2.5-72B-Instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/AXCXEPT-EZO-Qwen2.5-72B-Instruct-gguf", "project_name": "AXCXEPT-EZO-Qwen2.5-72B-Instruct-gguf", "downloads": 188, "source": "Hugging Face", "score": -0.05077031909263617, "first_commit": "2024-10-02 13:11:45", "latest_commit": "2024-10-03 01:03:35", "languages": [], "model_or_dataset": "model", "model_size": 72.7, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Shisa V2 Shisa V2 is a family of bilingual Japanese and English (JA/EN)", "url": "https://huggingface.co/shisa-ai/shisa-v2-llama3.3-70b", "project_name": "shisa-v2-llama3.3-70b", "downloads": 184, "source": "Hugging Face", "score": -0.05079871559541421, "first_commit": "2025-04-13 13:21:39", "latest_commit": "2025-04-16 13:25:07", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "QuantFactory/ELYZA-japanese-Llama-2-7b-instruct-GGUF", "url": "https://huggingface.co/QuantFactory/ELYZA-japanese-Llama-2-7b-instruct-GGUF", "project_name": "ELYZA-japanese-Llama-2-7b-instruct-GGUF", "downloads": 184, "source": "Hugging Face", "score": -0.05079871559541421, "first_commit": "2024-11-29 11:31:41", "latest_commit": "2024-11-29 12:04:51", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Vietnamese-Japanese Parallel Corpus 🌟 If you find this project valuable, please consider starring our VNJPTranslate GitHub repo!", "url": "https://huggingface.co/datasets/haiFrHust/VNJPTranslate", "project_name": "VNJPTranslate", "downloads": 184, "source": "Hugging Face", "score": -0.05079871559541421, "first_commit": "2025-03-31 14:18:11", "latest_commit": "2025-04-02 13:49:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "Japanese Voice Dataset Combined This dataset combines multiple high-quality Japanese voice datasets to create a comprehensive collection of Japanese speech data.", "url": "https://huggingface.co/datasets/kadirnar/japanese-voice-combined", "project_name": "japanese-voice-combined", "downloads": 183, "source": "Hugging Face", "score": -0.050805814721108726, "first_commit": "2025-04-13 12:51:44", "latest_commit": "2025-04-13 14:22:59", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Japanese Parler-TTS Mini (β版) このリポジトリは、parler-tts/parler-tts-mini-v1を基に、日本語でのテキスト読み上げを可能にするよう再学習したモデルを公開しています。", "url": "https://huggingface.co/2121-8/japanese-parler-tts-mini-bate", "project_name": "japanese-parler-tts-mini-bate", "downloads": 182, "source": "Hugging Face", "score": -0.05081291384680324, "first_commit": "2024-11-19 04:29:18", "latest_commit": "2024-12-05 08:26:32", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ParlerTTSForConditionalGeneration", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "日本語医療固有表現抽出モデル概要ソーシャル・コンピューティング研究室さまより公開されているMedTxt-CRを用いて、alabniiさまより公開されているRoBERTaをfine-tuningした固有表現抽出モデルです。 ", "url": "https://huggingface.co/daisaku-s/medtxt_ner_roberta", "project_name": "medtxt_ner_roberta", "downloads": 182, "source": "Hugging Face", "score": -0.05081291384680324, "first_commit": "2023-02-13 10:48:22", "latest_commit": "2023-02-15 13:43:48", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "I'm constantly enhancing these model descriptions to provide you with the most relevant and comprehensive information japanese-stablelm-3b-4e1t-instruct - GGUF Model creator: stabilityai Original model: japanese-stablelm-3b-4e1t-instruct StableLM", "url": "https://huggingface.co/maddes8cht/stabilityai-japanese-stablelm-3b-4e1t-instruct-gguf", "project_name": "stabilityai-japanese-stablelm-3b-4e1t-instruct-gguf", "downloads": 181, "source": "Hugging Face", "score": -0.05082001297249775, "first_commit": "2023-11-16 10:25:20", "latest_commit": "2023-11-16 12:53:33", "languages": [], "model_or_dataset": "model", "model_size": 2.8, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Model Card for Japanese BART base Model description This is a Japanese BART base model pre-trained on Japanese Wikipedia.", "url": "https://huggingface.co/ku-nlp/bart-base-japanese", "project_name": "bart-base-japanese", "downloads": 181, "source": "Hugging Face", "score": -0.05082001297249775, "first_commit": "2023-05-09 07:00:51", "latest_commit": "2023-05-12 11:03:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MBartForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Synthetic-JP-10-Turns-Roleplay-Dialogues-Nemotron-4-1k nvidia/Nemotron-4-340B-Instructを用いて作成した、約1000件・各10ターンの日本語ロールプレイの対話を収録した合成対話データセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-10-Turns-Roleplay-Dialogues-Nemotron-4-1k", "project_name": "Synthetic-JP-10-Turns-Roleplay-Dialogues-Nemotron-4-1k", "downloads": 180, "source": "Hugging Face", "score": -0.05082711209819226, "first_commit": "2024-07-03 13:21:22", "latest_commit": "2024-07-03 13:53:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "サヨ子音声コーパスダウンロード方法データセットを圧縮したzipファイルを、gdriveに置いています。 ", "url": "https://huggingface.co/datasets/bandad/sayoko-tts-corpus", "project_name": "sayoko-tts-corpus", "downloads": 180, "source": "Hugging Face", "score": -0.05082711209819226, "first_commit": "2023-08-16 02:11:29", "latest_commit": "2023-08-16 05:42:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "DeepSeek-R1-Distill-Qwen-1.5B-gguf deepseek-aiさんが公開しているDeepSeek-R1-Distill-Qwen-1.5Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/DeepSeek-R1-Distill-Qwen-1.5B-gguf", "project_name": "DeepSeek-R1-Distill-Qwen-1.5B-gguf", "downloads": 179, "source": "Hugging Face", "score": -0.05083421122388677, "first_commit": "2025-01-20 16:02:40", "latest_commit": "2025-01-20 18:25:31", "languages": [], "model_or_dataset": "model", "model_size": 1.78, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "OcuteusのGGUF版です。 ", "url": "https://huggingface.co/Local-Novel-LLM-project/Ocuteus-v1-gguf", "project_name": "Ocuteus-v1-gguf", "downloads": 179, "source": "Hugging Face", "score": -0.05083421122388677, "first_commit": "2024-05-07 09:57:49", "latest_commit": "2024-05-10 06:18:35", "languages": [], "model_or_dataset": "model", "model_size": 0.312, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Model Card for Tanrei/GPTSAN-japanese General-purpose Swich transformer based Japanese language model GPTSAN has some unique features.", "url": "https://huggingface.co/Tanrei/GPTSAN-japanese", "project_name": "GPTSAN-japanese", "downloads": 178, "source": "Hugging Face", "score": -0.05084131034958128, "first_commit": "2023-01-06 05:41:12", "latest_commit": "2023-04-21 19:04:49", "languages": [], "model_or_dataset": "model", "model_size": 2.78, "model_architectures": "GPTSanJapaneseForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Model Description", "url": "https://huggingface.co/knosing/japanese_ner_model", "project_name": "japanese_ner_model", "downloads": 178, "source": "Hugging Face", "score": -0.05084131034958128, "first_commit": "2024-05-08 06:15:37", "latest_commit": "2024-05-08 07:06:22", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "bert-base-japanese-jsnli This model is a fine-tuned version of cl-tohoku/bert-base-japanese-v2 on the JSNLI dataset.", "url": "https://huggingface.co/Formzu/bert-base-japanese-jsnli", "project_name": "bert-base-japanese-jsnli", "downloads": 177, "source": "Hugging Face", "score": -0.050848409475275795, "first_commit": "2022-10-14 07:50:13", "latest_commit": "2022-10-18 12:13:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Information Extraction & Text Mining", "Responsible & Trustworthy NLP", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "weblab-10b-instruction-sft-GPTQ Original model weblab-10b-instruction-sft which is a Japanese-centric multilingual GPT-NeoX model of 10 billion parameters created by matsuo-lab Takeshi Kojima.", "url": "https://huggingface.co/dahara1/weblab-10b-instruction-sft-GPTQ", "project_name": "weblab-10b-instruction-sft-GPTQ", "downloads": 175, "source": "Hugging Face", "score": -0.050862607726664816, "first_commit": "2023-08-21 05:45:35", "latest_commit": "2023-11-14 00:24:22", "languages": [], "model_or_dataset": "model", "model_size": 1.86, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Multilinguality", "Language Models", "Semantic Text Processing" ] }, { "description": "Washi (a kind of traditional Japanese paper)", "url": "https://huggingface.co/datasets/systemk/washi", "project_name": "washi", "downloads": 173, "source": "Hugging Face", "score": -0.050876805978053836, "first_commit": "2024-02-13 01:17:22", "latest_commit": "2024-03-06 03:16:54", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "japanese-asr/whisper_transcriptions.reazon_speech_all without audio", "url": "https://huggingface.co/datasets/efwkjn/reazonspeech_mtl", "project_name": "reazonspeech_mtl", "downloads": 173, "source": "Hugging Face", "score": -0.050876805978053836, "first_commit": "2025-01-28 06:27:32", "latest_commit": "2025-01-28 06:27:32", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech Recognition", "Text Generation", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "rinna/japanese-data2vec-audio-base Overview This is a Japanese data2vec Audio Base model trained by rinna Co.", "url": "https://huggingface.co/rinna/japanese-data2vec-audio-base", "project_name": "japanese-data2vec-audio-base", "downloads": 172, "source": "Hugging Face", "score": -0.05088390510374835, "first_commit": "2024-03-05 10:32:32", "latest_commit": "2024-07-22 08:12:56", "languages": [], "model_or_dataset": "model", "model_size": 0.0932, "model_architectures": "Data2VecAudioModel", "multi_labels": [ "Representation Learning", "Speech & Audio in NLP", "Multimodality" ] }, { "description": "日本語コーパス mc4-jaなどのwebコーパスをクリーニング後､教師なし学習モデルでテキストを約1万件にクラスタリングしたコーパスです｡著作権法で認められた情報解析目的で使用できます｡一部のファイルしかparquet化されていないので､ご注意ください｡ファイルリストはoutフォルダ内にあります git lfsなどでダウンロードください｡", "url": "https://huggingface.co/datasets/kanhatakeyama/japanese-corpus-categorized", "project_name": "japanese-corpus-categorized", "downloads": 172, "source": "Hugging Face", "score": -0.05088390510374835, "first_commit": "2024-09-25 23:18:11", "latest_commit": "2024-09-28 01:36:48", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "line-corporation/japanese-large-lm-3.6b line-corporationさんが公開しているjapanese-large-lm-3.6bのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/line-corp-japanese-large-lm-3.6b-gguf", "project_name": "line-corp-japanese-large-lm-3.6b-gguf", "downloads": 171, "source": "Hugging Face", "score": -0.050891004229442864, "first_commit": "2023-09-02 18:18:41", "latest_commit": "2023-09-08 02:53:05", "languages": [], "model_or_dataset": "model", "model_size": 3.71, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "llm-japanese-dataset-vanilla LLM構築用の日本語チャットデータセット izumi-lab/llm-japanese-dataset から，日英翻訳のデータセット等を抜いたものです． ", "url": "https://huggingface.co/datasets/izumi-lab/llm-japanese-dataset-vanilla", "project_name": "llm-japanese-dataset-vanilla", "downloads": 171, "source": "Hugging Face", "score": -0.050891004229442864, "first_commit": "2023-05-23 14:45:27", "latest_commit": "2024-02-17 16:17:18", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Semantic Text Processing" ] }, { "description": "AnswerCarefully Dataset 利用規約利用規約本データセットは、日本語および他の言語のLLMの安全性を向上させるという目的のため、商用利用も含め公開しています。 ", "url": "https://huggingface.co/datasets/llm-jp/AnswerCarefully", "project_name": "AnswerCarefully", "downloads": 171, "source": "Hugging Face", "score": -0.050891004229442864, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "COMET-T5 ja Finetuned T5 on ATOMIC ja using a text-to-text language modeling objective.", "url": "https://huggingface.co/nlp-waseda/comet-t5-base-japanese", "project_name": "comet-t5-base-japanese", "downloads": 170, "source": "Hugging Face", "score": -0.05089810335513737, "first_commit": "2022-11-12 15:07:40", "latest_commit": "2023-02-08 09:26:55", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Dialogue Response Generation", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Llama-3.1-70B-Instruct-gguf meta-llamaさんが公開しているMeta-Llama-3.1-70B-Instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3.1-70B-Instruct-gguf", "project_name": "Llama-3.1-70B-Instruct-gguf", "downloads": 170, "source": "Hugging Face", "score": -0.05089810335513737, "first_commit": "2024-07-23 17:25:23", "latest_commit": "2024-07-24 21:04:27", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Githubリポジトリstockmarkteam/ner-wikipedia-datasetで公開されているデータセットを利用しています。", "url": "https://huggingface.co/datasets/llm-book/ner-wikipedia-dataset", "project_name": "ner-wikipedia-dataset", "downloads": 169, "source": "Hugging Face", "score": -0.050905202480831885, "first_commit": "2023-04-15 10:43:21", "latest_commit": "2023-12-12 11:25:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition", "Annotation and Dataset Development" ] }, { "description": "nlp-waseda/roberta-large-japanese-seq512-with-auto-jumanpp Model description", "url": "https://huggingface.co/nlp-waseda/roberta-large-japanese-seq512-with-auto-jumanpp", "project_name": "roberta-large-japanese-seq512-with-auto-jumanpp", "downloads": 168, "source": "Hugging Face", "score": -0.05091230160652639, "first_commit": "2022-10-15 06:04:06", "latest_commit": "2022-10-21 15:56:38", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "GSM8K Japanese Slim Japanese translated version of openai/gsm8k, and the answer extracted from descriptions.", "url": "https://huggingface.co/datasets/p1atdev/gsm8k-ja-slim", "project_name": "gsm8k-ja-slim", "downloads": 168, "source": "Hugging Face", "score": -0.05091230160652639, "first_commit": "2025-02-07 05:23:06", "latest_commit": "2025-02-10 15:47:52", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "Dataset Summary JapaneseGoblin is a dump of en.touhouwiki.net wiki.", "url": "https://huggingface.co/datasets/RyokoExtra/JapaneseGoblin", "project_name": "JapaneseGoblin", "downloads": 165, "source": "Hugging Face", "score": -0.050933598983609926, "first_commit": "2023-08-04 16:13:14", "latest_commit": "2023-08-05 14:21:38", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification", "Text Generation" ] }, { "description": "License:CreativeML Open RAIL-M Additional Copyright: sazyou_roukaku (TwitterID @sazyou_roukaku) as of June 25, 2023 このモデルは『CreativeML Open RAIL-M』でLicenseそのものに変更はありません。 ", "url": "https://huggingface.co/sazyou-roukaku/LittleStepMix", "project_name": "LittleStepMix", "downloads": 164, "source": "Hugging Face", "score": -0.05094069810930444, "first_commit": "2023-06-25 06:57:42", "latest_commit": "2023-07-04 10:47:46", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "DeepSeek-R1-Distill-Qwen-32B-Japanese-gguf cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese License MIT License 👉 DeepSeek-R1-Distill-Qwen-14B-Japanese-gguf こっちのがいいかも👉 mmnga/cyberagent-DeepSeek-R1-Distill-Qwen-32B-Japanese-gguf", "url": "https://huggingface.co/bluepen5805/DeepSeek-R1-Distill-Qwen-32B-Japanese-gguf", "project_name": "DeepSeek-R1-Distill-Qwen-32B-Japanese-gguf", "downloads": 162, "source": "Hugging Face", "score": -0.05095489636069346, "first_commit": "2025-01-27 09:18:44", "latest_commit": "2025-01-28 04:14:17", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "About weighted/imatrix quants of https://huggingface.co/TFMC/Japanese-Starling-ChatV-7B static quants are available at https://huggingface.co/mradermacher/Japanese-Starling-ChatV-7B-GGUF Usage", "url": "https://huggingface.co/mradermacher/Japanese-Starling-ChatV-7B-i1-GGUF", "project_name": "Japanese-Starling-ChatV-7B-i1-GGUF", "downloads": 162, "source": "Hugging Face", "score": -0.05095489636069346, "first_commit": "2025-01-24 06:16:21", "latest_commit": "2025-01-24 07:04:40", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Model Card For gemma-2-2b-jpn-it-gguf rinnaさんのgemma-2-baku-2b-itを量子化したものたちです。 ", "url": "https://huggingface.co/alfredplpl/gemma-2-baku-2b-it-gguf", "project_name": "gemma-2-baku-2b-it-gguf", "downloads": 162, "source": "Hugging Face", "score": -0.05095489636069346, "first_commit": "2024-10-03 09:49:40", "latest_commit": "2024-10-03 10:07:29", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "shisa-base-7b-v1 shisa-base-7b-v1 takes Mistral 7B and adds an additional 8B tokens of primarily Japanese pre-training.", "url": "https://huggingface.co/augmxnt/shisa-base-7b-v1", "project_name": "shisa-base-7b-v1", "downloads": 161, "source": "Hugging Face", "score": -0.050961995486387975, "first_commit": "2023-11-19 09:44:36", "latest_commit": "2023-12-09 10:34:29", "languages": [], "model_or_dataset": "model", "model_size": 7.96, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "ABEJA-Qwen2.5-32b-Japanese-v0.1-gguf abejaさんが公開しているABEJA-Qwen2.5-32b-Japanese-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ABEJA-Qwen2.5-32b-Japanese-v0.1-gguf", "project_name": "ABEJA-Qwen2.5-32b-Japanese-v0.1-gguf", "downloads": 161, "source": "Hugging Face", "score": -0.050961995486387975, "first_commit": "2025-01-27 12:57:15", "latest_commit": "2025-01-27 20:31:34", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [] }, { "description": "This repository contains some GGUF quantizations of the merged VNTL LLaMA3 8B 202409 qlora model, created using a custom version of the VNTL dataset combined with the VNTL-Chat dataset.", "url": "https://huggingface.co/lmg-anon/vntl-llama3-8b-202409-gguf", "project_name": "vntl-llama3-8b-202409-gguf", "downloads": 161, "source": "Hugging Face", "score": -0.050961995486387975, "first_commit": "2024-09-25 15:04:57", "latest_commit": "2024-09-25 16:29:08", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "What’s this?", "url": "https://huggingface.co/globis-university/deberta-v3-japanese-base", "project_name": "deberta-v3-japanese-base", "downloads": 160, "source": "Hugging Face", "score": -0.05096909461208249, "first_commit": "2023-09-21 16:19:31", "latest_commit": "2024-07-05 05:49:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing", "Morphology" ] }, { "description": "Model Card For llm-jp-3-3.7b-instruct-gguf LLM-jpさんのllm-jp-3-3.7b-instructを量子化したものたちです。 ", "url": "https://huggingface.co/alfredplpl/llm-jp-3-3.7b-instruct-gguf", "project_name": "llm-jp-3-3.7b-instruct-gguf", "downloads": 160, "source": "Hugging Face", "score": -0.05096909461208249, "first_commit": "2024-09-28 04:01:28", "latest_commit": "2024-10-03 10:01:44", "languages": [], "model_or_dataset": "model", "model_size": 3.78, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "概要 oscar-corpus/OSCAR-2301の以下のjaのみを corpus-cleanerを使用してデータクリーニングを行なったデーセット群 Code Language # docs # words Content Length : ja Japanese 94,236,404 4,401,059,165 181.2 GB ただし以下のファイルは、クリーニングが成功していないため除外しています。 ", "url": "https://huggingface.co/datasets/ayousanz/OSCOR-2301-ja-cleaned", "project_name": "OSCOR-2301-ja-cleaned", "downloads": 160, "source": "Hugging Face", "score": -0.05096909461208249, "first_commit": "2024-05-08 04:52:54", "latest_commit": "2024-05-09 03:16:36", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Ani-Bench-JP データセット概要 Ani-Bench-JP は、日本の人気アニメに関する知識を測定するためのベンチマーク用データセットです。", "url": "https://huggingface.co/datasets/umiyuki/Ani-Bench-JP", "project_name": "Ani-Bench-JP", "downloads": 160, "source": "Hugging Face", "score": -0.05096909461208249, "first_commit": "2025-03-28 05:19:50", "latest_commit": "2025-03-28 11:11:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "DataPilot/ArrowNeo-AME-4x3B-v0.1-MoE overview このモデルはAItuberの魂となることを目的にSB intuitionsのsarashina-2.2-instruct-v0.1をベースにUnsothとMergekit-MoEを用いて作られました。 ", "url": "https://huggingface.co/DataPilot/ArrowNeo-AME-4x3B-v0.1-MoE", "project_name": "ArrowNeo-AME-4x3B-v0.1-MoE", "downloads": 159, "source": "Hugging Face", "score": -0.050976193737776995, "first_commit": "2025-03-28 18:15:40", "latest_commit": "2025-03-30 04:39:15", "languages": [], "model_or_dataset": "model", "model_size": 7.76, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Finetuned Waseda RoBERTa to evaluate the generated answers on JTruthfulQA.", "url": "https://huggingface.co/nlp-waseda/roberta_jtruthfulqa", "project_name": "roberta_jtruthfulqa", "downloads": 159, "source": "Hugging Face", "score": -0.050976193737776995, "first_commit": "2023-12-06 01:33:02", "latest_commit": "2023-12-06 04:31:12", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "RobertaForSequenceClassification", "multi_labels": [ "Natural Language Interfaces", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "kurumi_flux_lora_v1.0（flux.1 系統）本モデルは、flux1.", "url": "https://huggingface.co/Kotajiro/kurumi_flux", "project_name": "kurumi_flux", "downloads": 158, "source": "Hugging Face", "score": -0.05098329286347151, "first_commit": "2025-02-26 08:06:58", "latest_commit": "2025-04-12 02:47:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "bilingual-gpt-neox-4b-8k Overview Notice: This model requires transformers>=4.31.0 to work properly.", "url": "https://huggingface.co/rinna/bilingual-gpt-neox-4b-8k", "project_name": "bilingual-gpt-neox-4b-8k", "downloads": 158, "source": "Hugging Face", "score": -0.05098329286347151, "first_commit": "2023-07-31 02:34:21", "latest_commit": "2024-07-20 08:03:16", "languages": [], "model_or_dataset": "model", "model_size": 3.95, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Multilinguality", "Language Models", "Semantic Text Processing" ] }, { "description": "ELAINE-medllm - Build with Llama3-8B ELAINE (EngLish-jApanese-chINesE)-", "url": "https://huggingface.co/kenyano/Llama3-ELAINE-medLLM-instruct-8B_v0.1", "project_name": "Llama3-ELAINE-medLLM-instruct-8B_v0.1", "downloads": 157, "source": "Hugging Face", "score": -0.050990391989166016, "first_commit": "2025-03-23 11:34:56", "latest_commit": "2025-04-04 00:43:17", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Multilinguality", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "評価スコアの再現性確保と SB Intuitions 修正版の公開用クローンソース: aiishii/JEMHopQA on GitHub JEMHopQA JEMHopQA (Japanese Explainable Multi-hop Question Answering)は、回答導出ステップの情報付きの日本語の根拠情報付きマルチホップQAデータセットです。", "url": "https://huggingface.co/datasets/sbintuitions/JEMHopQA", "project_name": "JEMHopQA", "downloads": 157, "source": "Hugging Face", "score": -0.050990391989166016, "first_commit": "2024-06-25 16:26:47", "latest_commit": "2024-09-13 05:38:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Question Answering" ] }, { "description": "iterative-dpo-data-for-ORPO-iter3 概要合成instructionデータであるAratako/Self-Instruct-Qwen2.5-72B-Instruct-60kを元に以下のような手順で作成した日本語Preferenceデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/iterative-dpo-data-for-ORPO-iter3", "project_name": "iterative-dpo-data-for-ORPO-iter3", "downloads": 156, "source": "Hugging Face", "score": -0.05099749111486053, "first_commit": "2024-12-15 16:03:35", "latest_commit": "2024-12-17 01:42:13", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation" ] }, { "description": "line-corporation/japanese-large-lm-3.6b-instruction-sft line-corporationさんが公開しているjapanese-large-lm-3.6b-instruction-sftのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/line-corp-japanese-large-lm-3.6b-instruction-sft-gguf", "project_name": "line-corp-japanese-large-lm-3.6b-instruction-sft-gguf", "downloads": 155, "source": "Hugging Face", "score": -0.051004590240555044, "first_commit": "2023-09-02 18:01:40", "latest_commit": "2023-09-08 02:52:29", "languages": [], "model_or_dataset": "model", "model_size": 3.71, "model_architectures": null, "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "WabiSabi-V1-gguf Local-Novel-LLM-projectさんが公開しているWabiSabi-V1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/WabiSabi-V1-gguf", "project_name": "WabiSabi-V1-gguf", "downloads": 155, "source": "Hugging Face", "score": -0.051004590240555044, "first_commit": "2025-01-25 12:38:13", "latest_commit": "2025-01-25 14:24:12", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Model Trained Using AutoNLP Problem type: Binary Classification Model ID: 59362 Validation Metrics Loss: 0.13092292845249176 Accuracy: 0.9527127414314258 Precision: 0.9634070704982427 Recall: 0.9842171959602166 AUC: 0.9667289746092403 F1: 0.9737009564152002 Usage You can use cURL to access this model: $ curl -X POST -H \"Authorization: Bearer YOUR_API_KEY\" -H \"Content-Type: application/json\" -d '{\"inputs\": \"I love AutoNLP\"}' https://api-inference.huggingface.co/models/abhishek/autonlp-japanese-sentiment-5936", "url": "https://huggingface.co/abhishek/autonlp-japanese-sentiment-59362", "project_name": "autonlp-japanese-sentiment-59362", "downloads": 153, "source": "Hugging Face", "score": -0.051018788491944064, "first_commit": "2021-04-21 11:28:11", "latest_commit": "2021-05-18 22:55:03", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification" ] }, { "description": "JaCWIR: Japanese Casual Web IR - 日本語情報検索評価のための小規模でカジュアルなWebタイトルと概要のデータセット近年、大規模言語モデル（LLM）の台頭により、一般的な日本語を用いた自然な検索クエリで質問するユースケースが増えています。", "url": "https://huggingface.co/datasets/hotchpotch/JaCWIR", "project_name": "JaCWIR", "downloads": 153, "source": "Hugging Face", "score": -0.051018788491944064, "first_commit": "2024-03-23 05:57:58", "latest_commit": "2024-04-01 02:34:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Dialogue Systems & Conversational Agents" ] }, { "description": "Synthetic-JP-EN-Coding-Dataset-567k Magpieによって作成したコードSFTデータセットであるAratako/Synthetic-JP-EN-Coding-Dataset-Magpie-69kを元に、Evol-Instructのような手法を用いて複数のinstructionとresonseを生成し拡張して作成した、日英混合567077件のコードSFT用合成データセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-EN-Coding-Dataset-567k", "project_name": "Synthetic-JP-EN-Coding-Dataset-567k", "downloads": 153, "source": "Hugging Face", "score": -0.051018788491944064, "first_commit": "2024-07-14 14:04:30", "latest_commit": "2024-07-14 14:40:50", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "kanalizer 英単語から読みを推測するライブラリ、kanalizerのデータセット置き場。", "url": "https://huggingface.co/datasets/VOICEVOX/kanalizer-dataset", "project_name": "kanalizer-dataset", "downloads": 152, "source": "Hugging Face", "score": -0.05102588761763858, "first_commit": "2025-04-14 03:55:55", "latest_commit": "2025-04-20 13:34:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Jawiki Sentences Dataset このデータセットは、日本語版Wikipediaの記事を元に作成されました。", "url": "https://huggingface.co/datasets/tet550/jawiki_sentences", "project_name": "jawiki_sentences", "downloads": 152, "source": "Hugging Face", "score": -0.05102588761763858, "first_commit": "2023-07-16 00:14:53", "latest_commit": "2023-07-16 08:17:58", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "AXCXEPT-EZO-phi-4-v2_900-gguf AXCXEPTさんが公開しているEZO-phi-4-v2_900のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/AXCXEPT-EZO-phi-4-v2_900-gguf", "project_name": "AXCXEPT-EZO-phi-4-v2_900-gguf", "downloads": 151, "source": "Hugging Face", "score": -0.051032986743333085, "first_commit": "2025-01-22 22:50:55", "latest_commit": "2025-01-23 01:49:16", "languages": [], "model_or_dataset": "model", "model_size": 14.7, "model_architectures": null, "multi_labels": [] }, { "description": "J-ResearchCorpus Update: 2024/3/16言語処理学会第30回年次大会(NLP2024)を含む、論文 1,343 本のデータを追加 2024/2/25言語処理学会誌「自然言語処理」のうち CC-BY-4.0 で公開されている論文 360 本のデータを追加概要 CC-BY-* ライセンスで公開されている日本語論文や学会誌等から抜粋した高品質なテキストのデータセットです。", "url": "https://huggingface.co/datasets/kunishou/J-ResearchCorpus", "project_name": "J-ResearchCorpus", "downloads": 151, "source": "Hugging Face", "score": -0.051032986743333085, "first_commit": "2024-02-12 14:03:42", "latest_commit": "2024-03-16 07:55:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "モデルについて Qwen/Qwen1.5-0.5Bを日英データ5Bトークンで継続事前学習したTokara-0.5B-v0.1にchat vectorで対話能力を加えたモデルになります。 ", "url": "https://huggingface.co/Kendamarron/Tokara-0.5B-Chat-v0.1", "project_name": "Tokara-0.5B-Chat-v0.1", "downloads": 150, "source": "Hugging Face", "score": -0.0510400858690276, "first_commit": "2024-05-06 15:47:55", "latest_commit": "2024-05-08 13:30:12", "languages": [], "model_or_dataset": "model", "model_size": 0.464, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Dataset Summary This is the Business Scene Dialogue (BSD) dataset, a Japanese-English parallel corpus containing written conversations in various business scenarios.", "url": "https://huggingface.co/datasets/ryo0634/bsd_ja_en", "project_name": "bsd_ja_en", "downloads": 150, "source": "Hugging Face", "score": -0.0510400858690276, "first_commit": "2022-01-25 16:35:02", "latest_commit": "2024-01-11 07:36:44", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Dialogue Systems & Conversational Agents", "Annotation and Dataset Development" ] }, { "description": "Summary This is a LLaMA 3 Youko qlora fine-tune, created using a new version of the VNTL dataset.", "url": "https://huggingface.co/lmg-anon/vntl-llama3-8b-v2-hf", "project_name": "vntl-llama3-8b-v2-hf", "downloads": 149, "source": "Hugging Face", "score": -0.05104718499472211, "first_commit": "2025-01-01 23:25:03", "latest_commit": "2025-01-13 22:29:59", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [] }, { "description": "Mitsua Japanese CLIP ViT-B-16 明示的な許諾を得たオプトインデータ、オープンライセンスデータ、パブリックドメインデータのみでトレーニングされた日本語/英語バイリンガルCLIP (Contrastive Language-Image Pre-training)モデルです。 ", "url": "https://huggingface.co/Mitsua/mitsua-japanese-clip-vit-b-16", "project_name": "mitsua-japanese-clip-vit-b-16", "downloads": 149, "source": "Hugging Face", "score": -0.05104718499472211, "first_commit": "2024-12-05 09:03:24", "latest_commit": "2024-12-09 01:07:11", "languages": [], "model_or_dataset": "model", "model_size": 0.221, "model_architectures": "MitsuaJapaneseCLIPModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "JaMARD: Japanese Mathematical Dataset with Assured Reasoning Description English / Japanese Overview JaMARD (Japanese Mathematical Dataset with Assured Reasoning Description) is a high-quality synthetic dataset for Japanese mathematical problems with chain-of-thought reasoning, where the correctness of synthetic instances is assured.", "url": "https://huggingface.co/datasets/elyza/JaMARD", "project_name": "JaMARD", "downloads": 149, "source": "Hugging Face", "score": -0.05104718499472211, "first_commit": "2025-03-09 14:57:33", "latest_commit": "2025-03-10 04:57:32", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning" ] }, { "description": "Llama-3.3-SuperSwallow-70B-Instruct-v0.1 This is a merge of pre-trained language models created using mergekit. ", "url": "https://huggingface.co/nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1", "project_name": "Llama-3.3-SuperSwallow-70B-Instruct-v0.1", "downloads": 146, "source": "Hugging Face", "score": -0.05106848237180564, "first_commit": "2024-12-22 12:55:08", "latest_commit": "2024-12-22 13:37:41", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "SakuraMixSeries 背景とキャラクタークオリティーを両立させたVAE内蔵型モデル Model with built-in VAE for both background and character quality 📄 ライセンス / License 修正 CreativeML OpenRAIL-M ライセンス / Modified CreativeML OpenRAIL-M license このモデルのクレジットを入れずに使用する Use the model without crediting the creator このモデルで生成した画像を商用利用する Sell images they generate このモデルを商用の画像生成サービスで利用する Run on services that generate images for money このモデルを使用したマージモデルを共有する Share merges using this model このモデル、またはこのモデルをマージしたモデルを販売する Sell this model or merges using this model このモデ", "url": "https://huggingface.co/natsusakiyomi/SakuraMix", "project_name": "SakuraMix", "downloads": 146, "source": "Hugging Face", "score": -0.05106848237180564, "first_commit": "2023-03-17 17:37:21", "latest_commit": "2023-08-22 12:30:44", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Dataset Description", "url": "https://huggingface.co/datasets/NekoFi/whisper_toku", "project_name": "whisper_toku", "downloads": 145, "source": "Hugging Face", "score": -0.051075581497500154, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Fine-tuned XLSR-53 large model for speech diarization in Japanese phone-call 2 speakers diarization model which was fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using phone-call data CallHome.", "url": "https://huggingface.co/Ivydata/wav2vec2-large-speech-diarization-jp", "project_name": "wav2vec2-large-speech-diarization-jp", "downloads": 143, "source": "Hugging Face", "score": -0.051089779748889175, "first_commit": "2023-05-08 10:10:43", "latest_commit": "2023-05-10 00:32:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForAudioFrameClassification", "multi_labels": [ "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "HPLT Bert for Japanese This is one of the encoder-only monolingual language models trained as a first release by the HPLT project.", "url": "https://huggingface.co/HPLT/hplt_bert_base_ja", "project_name": "hplt_bert_base_ja", "downloads": 142, "source": "Hugging Face", "score": -0.05109687887458369, "first_commit": "2024-04-22 01:23:46", "latest_commit": "2024-07-11 11:36:10", "languages": [], "model_or_dataset": "model", "model_size": 0.128, "model_architectures": "LtgbertForMaskedLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "COMET-GPT2 ja v2 Finetuned GPT-2 on the large version of ATOMIC ja using a causal language modeling (CLM) objective.", "url": "https://huggingface.co/nlp-waseda/comet-v2-gpt2-small-japanese", "project_name": "comet-v2-gpt2-small-japanese", "downloads": 141, "source": "Hugging Face", "score": -0.0511039780002782, "first_commit": "2023-03-05 13:39:03", "latest_commit": "2023-03-14 16:56:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Dialogue Response Generation", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "モデルベースモデル：microsoft/Phi-3-mini-4k-instruct 学習データセット：llm-jp/hh-rlhf-12k-ja 学習方式：フルパラメータチューニングサンプル import torch from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained( \"ryota39/Phi-3-mini-4k-instruct-dpo\", trust_remote_code=True, ) model = AutoModelForCausalLM.from_pretrained( \"ryota39/Phi-3-mini-4k-instruct-dpo\", device_map=\"auto\", torch_dtype='auto', trust_remote_code=True, ) text = \"<|user|>\\n与えられた質問に対して英語で思考し、日本語で答えてください。", "url": "https://huggingface.co/eliashasnat/phi-3", "project_name": "phi-3", "downloads": 141, "source": "Hugging Face", "score": -0.0511039780002782, "first_commit": "2024-05-08 06:05:02", "latest_commit": "2024-05-08 15:48:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Phi3ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Description This is a WIP dataset!", "url": "https://huggingface.co/datasets/morinoko-inari/ruby-rails-ja-en", "project_name": "ruby-rails-ja-en", "downloads": 141, "source": "Hugging Face", "score": -0.0511039780002782, "first_commit": "2025-04-12 20:15:35", "latest_commit": "2025-04-20 14:34:41", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "YakugakuQA YakugakuQA is a question answering dataset, consisting of 13 years (2012-2024)", "url": "https://huggingface.co/datasets/EQUES/YakugakuQA", "project_name": "YakugakuQA", "downloads": 141, "source": "Hugging Face", "score": -0.0511039780002782, "first_commit": "2024-09-09 02:35:20", "latest_commit": "2024-12-10 06:10:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Question Answering", "Annotation and Dataset Development" ] }, { "description": "Fish Speech V1.2 Fish Speech V1.2 is a leading text-to-speech (TTS) model trained on 300k hours of English, Chinese, and Japanese audio data.", "url": "https://huggingface.co/fishaudio/fish-speech-1.2", "project_name": "fish-speech-1.2", "downloads": 140, "source": "Hugging Face", "score": -0.05111107712597271, "first_commit": "2024-07-02 04:24:09", "latest_commit": "2024-07-02 04:31:26", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "Fine-tuned Japanese Whisper model for speech recognition using whisper-base Fine-tuned openai/whisper-base on Japanese using Common Voice, JVS and JSUT.", "url": "https://huggingface.co/Ivydata/whisper-base-japanese", "project_name": "whisper-base-japanese", "downloads": 139, "source": "Hugging Face", "score": -0.05111817625166722, "first_commit": "2023-05-17 04:36:41", "latest_commit": "2023-06-08 00:17:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "nlp-waseda/roberta-large-japanese-with-auto-jumanpp Model description", "url": "https://huggingface.co/nlp-waseda/roberta-large-japanese-with-auto-jumanpp", "project_name": "roberta-large-japanese-with-auto-jumanpp", "downloads": 139, "source": "Hugging Face", "score": -0.05111817625166722, "first_commit": "2022-10-15 05:40:40", "latest_commit": "2022-10-21 15:55:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "output 筑波 2.0035860538482666 つくば 1.6586617231369019 研究 1.6227693557739258 大学 1.3798155784606934 実験 0.5522942543029785 学生 0.42351895570755005 分析 0.37844282388687134 国立 0.3685397505760193 キャンパス 0.36495038866996765 茨城 0.3056415021419525 科学 0.2876652181148529 関東 0.24301066994667053 地域 0.21340851485729218 実施 0.1976248174905777 先端 0.192025288939476 サイト 0.11629197001457214 調査 0.09159307181835175 プロジェクト 0.08552580326795578 議論 0.07484486699104309 検討 0.007034890353679657", "url": "https://huggingface.co/aken12/splade-japanese-efficient", "project_name": "splade-japanese-efficient", "downloads": 139, "source": "Hugging Face", "score": -0.05111817625166722, "first_commit": "2024-03-11 03:02:28", "latest_commit": "2024-03-16 16:27:35", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "モデルについて Qwen/Qwen1.5-0.5Bを日英データ5Bトークンで継続事前学習したTokara-0.5B-v0.1を日本語instructionデータセットでファインチューニングしたモデルです。 ", "url": "https://huggingface.co/Kendamarron/Tokara-0.5B-Chat-dolly-jimba", "project_name": "Tokara-0.5B-Chat-dolly-jimba", "downloads": 139, "source": "Hugging Face", "score": -0.05111817625166722, "first_commit": "2024-05-06 15:24:56", "latest_commit": "2024-05-08 13:30:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese-Heron-Bench Dataset Description Japanese-Heron-Bench is a benchmark for evaluating Japanese VLMs (Vision-Language Models).", "url": "https://huggingface.co/datasets/turing-motors/Japanese-Heron-Bench", "project_name": "Japanese-Heron-Bench", "downloads": 138, "source": "Hugging Face", "score": -0.05112527537736174, "first_commit": "2024-04-12 01:54:01", "latest_commit": "2024-04-12 08:59:36", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "MobileBERT 日本語事前学習済みモデル爆誕！！ ", "url": "https://huggingface.co/ysakuramoto/mobilebert-ja", "project_name": "mobilebert-ja", "downloads": 137, "source": "Hugging Face", "score": -0.051132374503056244, "first_commit": "2022-01-23 11:29:39", "latest_commit": "2022-01-24 05:25:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "DeepSeek-R1-Distill-Llama-8B-gguf deepseek-aiさんが公開しているDeepSeek-R1-Distill-Llama-8Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/DeepSeek-R1-Distill-Llama-8B-gguf", "project_name": "DeepSeek-R1-Distill-Llama-8B-gguf", "downloads": 137, "source": "Hugging Face", "score": -0.051132374503056244, "first_commit": "2025-01-20 16:10:16", "latest_commit": "2025-01-20 18:25:52", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "stockmark-100b-gguf stockmarkさんが公開しているstockmark-100bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/stockmark-100b-gguf", "project_name": "stockmark-100b-gguf", "downloads": 137, "source": "Hugging Face", "score": -0.051132374503056244, "first_commit": "2024-05-17 12:45:56", "latest_commit": "2024-05-18 09:14:46", "languages": [], "model_or_dataset": "model", "model_size": 96.2, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This repository contains some GGUF quantizations of the VNTL Gemma 2 27B model.", "url": "https://huggingface.co/lmg-anon/vntl-gemma2-27b-gguf", "project_name": "vntl-gemma2-27b-gguf", "downloads": 136, "source": "Hugging Face", "score": -0.05113947362875076, "first_commit": "2024-07-07 00:28:06", "latest_commit": "2024-07-08 16:13:54", "languages": [], "model_or_dataset": "model", "model_size": 27.2, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Language Models" ] }, { "description": "モデルについて Qwen/Qwen1.5-0.5Bを日英データ5Bトークンで継続事前学習したモデルです。 ", "url": "https://huggingface.co/Kendamarron/Tokara-0.5B-v0.1", "project_name": "Tokara-0.5B-v0.1", "downloads": 136, "source": "Hugging Face", "score": -0.05113947362875076, "first_commit": "2024-05-06 11:39:26", "latest_commit": "2024-05-08 12:44:05", "languages": [], "model_or_dataset": "model", "model_size": 0.464, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "whisper-large-v2-japanese-5k-steps This model is a fine-tuned version of openai/whisper-large-v2 on the Japanese CommonVoice dataset (v11)..", "url": "https://huggingface.co/clu-ling/whisper-large-v2-japanese-5k-steps", "project_name": "whisper-large-v2-japanese-5k-steps", "downloads": 135, "source": "Hugging Face", "score": -0.051146572754445264, "first_commit": "2023-01-28 22:14:29", "latest_commit": "2023-03-03 21:11:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [] }, { "description": "AIBunCho/japanese-novel-gpt-j-6b AI BunChoで利用しているモデルです。", "url": "https://huggingface.co/AIBunCho/japanese-novel-gpt-j-6b", "project_name": "japanese-novel-gpt-j-6b", "downloads": 135, "source": "Hugging Face", "score": -0.051146572754445264, "first_commit": "2023-08-11 00:52:32", "latest_commit": "2023-08-26 04:20:51", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTJForCausalLM", "multi_labels": [ "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "wanabi-24B (preview) wanabi-24B は、小説執筆支援に特化してファインチューニングされた大規模言語モデルのプレビュー版 (preview) です。 ", "url": "https://huggingface.co/kawaimasa/wanabi_24b_preview_gguf", "project_name": "wanabi_24b_preview_gguf", "downloads": 134, "source": "Hugging Face", "score": -0.05115367188013978, "first_commit": "2025-04-23 11:23:04", "latest_commit": "2025-04-27 06:18:11", "languages": [], "model_or_dataset": "model", "model_size": 23.6, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-instruct-v0.1", "project_name": "Swallow-70b-instruct-v0.1", "downloads": 134, "source": "Hugging Face", "score": -0.05115367188013978, "first_commit": "2024-03-06 14:39:34", "latest_commit": "2024-06-29 09:00:17", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "CC-news-2024-July-October-cleaned このデータセットはCommon Crawlのnewsサブセットから作成した2024年7月から10月の日本語のニュースの文章が収録されています。 ", "url": "https://huggingface.co/datasets/kajuma/CC-news-2024-July-October-cleaned", "project_name": "CC-news-2024-July-October-cleaned", "downloads": 134, "source": "Hugging Face", "score": -0.05115367188013978, "first_commit": "2024-11-17 13:16:59", "latest_commit": "2024-11-17 15:07:26", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Model Card for Model ID Model Details Model Description", "url": "https://huggingface.co/Ryoma0302/gpt_0.76B_global_step3000_japanese", "project_name": "gpt_0.76B_global_step3000_japanese", "downloads": 133, "source": "Hugging Face", "score": -0.05116077100583429, "first_commit": "2024-04-29 02:24:53", "latest_commit": "2024-04-29 02:56:17", "languages": [], "model_or_dataset": "model", "model_size": 0.732, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "HODACHI-EZO-Humanities-9B-gemma-2-it-gguf HODACHIさんが公開しているEZO-Humanities-9B-gemma-2-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/HODACHI-EZO-Humanities-9B-gemma-2-it-gguf", "project_name": "HODACHI-EZO-Humanities-9B-gemma-2-it-gguf", "downloads": 133, "source": "Hugging Face", "score": -0.05116077100583429, "first_commit": "2024-07-15 15:43:00", "latest_commit": "2024-07-15 17:01:09", "languages": [], "model_or_dataset": "model", "model_size": 9.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "🥕 如果兔兔的仓库对你有帮助的话点个⭐喵~ If Tutu's repository is helpful to you, please give it a ⭐ meow~ もしうさぎのリポジトリが役に立った場合は、⭐をぽちっとしてくださいにゃん~ 🍉 任何 ❓", "url": "https://huggingface.co/datasets/MomoyamaSawa/Voice-KusanagiNene", "project_name": "Voice-KusanagiNene", "downloads": 133, "source": "Hugging Face", "score": -0.05116077100583429, "first_commit": "2024-01-29 08:17:21", "latest_commit": "2024-01-29 10:51:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "range3/wiki40b-ja This dataset consists of three parquet files from the wiki40b dataset with only Japanese data extracted.", "url": "https://huggingface.co/datasets/range3/wiki40b-ja", "project_name": "wiki40b-ja", "downloads": 133, "source": "Hugging Face", "score": -0.05116077100583429, "first_commit": "2023-02-04 04:54:17", "latest_commit": "2023-02-04 05:44:21", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "日本語ウィキペディア入力誤りデータセット (漢字誤変換抽出版) 概要このデータセットは，京都大学言語メディア研究室によって公開されているデータセットをHuggingFaceで使用できるよう変換したものです．", "url": "https://huggingface.co/datasets/JunSotohigashi/JapaneseWikipediaTypoDataset_kanji", "project_name": "JapaneseWikipediaTypoDataset_kanji", "downloads": 133, "source": "Hugging Face", "score": -0.05116077100583429, "first_commit": "2025-01-22 07:54:19", "latest_commit": "2025-01-22 08:19:02", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "GitHub リポジトリ singletongue/wikipedia-utils で公開されているデータセットを利用しています。 ", "url": "https://huggingface.co/datasets/llm-book/jawiki-sentences", "project_name": "jawiki-sentences", "downloads": 132, "source": "Hugging Face", "score": -0.0511678701315288, "first_commit": "2023-06-03 03:02:08", "latest_commit": "2023-10-25 15:22:05", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese InstructBLIP Alpha Model Details Japanese InstructBLIP Alpha is a vision-language instruction-following model that enables to generate Japanese descriptions for input images and optionally input texts such as questions.", "url": "https://huggingface.co/stabilityai/japanese-instructblip-alpha", "project_name": "japanese-instructblip-alpha", "downloads": 131, "source": "Hugging Face", "score": -0.05117496925722331, "first_commit": "2023-08-16 23:49:58", "latest_commit": "2023-11-17 03:57:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "JapaneseInstructBlipAlphaForConditionalGeneration", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-base-medium", "project_name": "t5-base-medium", "downloads": 131, "source": "Hugging Face", "score": -0.05117496925722331, "first_commit": "2023-04-26 08:27:09", "latest_commit": "2023-05-10 10:00:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "AtheneX-V2-72B-instruct This is a merge of pre-trained language models created using mergekit. ", "url": "https://huggingface.co/nitky/AtheneX-V2-72B-instruct", "project_name": "AtheneX-V2-72B-instruct", "downloads": 130, "source": "Hugging Face", "score": -0.05118206838291783, "first_commit": "2024-11-19 14:27:17", "latest_commit": "2024-11-20 08:53:26", "languages": [], "model_or_dataset": "model", "model_size": 72.7, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Dataset used to train Pokémon text to image model, add a Japanese Column of Pokémon BLIP captions BLIP generated captions for Pokémon images from Few Shot Pokémon dataset introduced by Towards Faster and Stabilized GAN Training for High-fidelity Few-shot Image Synthesis (FastGAN).", "url": "https://huggingface.co/datasets/svjack/pokemon-blip-captions-en-ja", "project_name": "pokemon-blip-captions-en-ja", "downloads": 130, "source": "Hugging Face", "score": -0.05118206838291783, "first_commit": "2022-10-29 07:26:57", "latest_commit": "2022-10-31 06:22:04", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Visual Data in NLP", "Captioning", "Text Generation", "Multimodality", "Low-Resource NLP", "Annotation and Dataset Development" ] }, { "description": "japanese-gpt-1b-PII-masking Model Description japanese-gpt-1b-PII-masking は、日本語事前学習済み1B GPTモデルをベースとして、日本語の文章から個人情報をマスキングするように学習したモデルです。 ", "url": "https://huggingface.co/cameltech/japanese-gpt-1b-PII-masking", "project_name": "japanese-gpt-1b-PII-masking", "downloads": 129, "source": "Hugging Face", "score": -0.051189167508612334, "first_commit": "2024-04-05 07:26:29", "latest_commit": "2024-05-17 11:42:00", "languages": [], "model_or_dataset": "model", "model_size": 1.3, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "WS TCG Card Text Translator A Japanese-English machine translation model specifically trained for translating card text from the Weiss Schwarz (WS) Trading Card Game, fine-tuned on Helsinki-NLP/opus-mt-ja-en.", "url": "https://huggingface.co/eepj/wstcg-mt-ja-en", "project_name": "wstcg-mt-ja-en", "downloads": 129, "source": "Hugging Face", "score": -0.051189167508612334, "first_commit": "2024-04-30 13:20:20", "latest_commit": "2024-05-22 02:45:55", "languages": [], "model_or_dataset": "model", "model_size": 0.07529999999999999, "model_architectures": "MarianMTModel", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "Japanese CLIP ViT-H/14 (Base) Table of Contents Overview Usage Model Details Evaluation Limitations and Biases Citation See Also Contact Information Overview Developed by:", "url": "https://huggingface.co/hakuhodo-tech/japanese-clip-vit-h-14-bert-base", "project_name": "japanese-clip-vit-h-14-bert-base", "downloads": 128, "source": "Hugging Face", "score": -0.05119626663430685, "first_commit": "2024-03-06 03:23:16", "latest_commit": "2024-03-06 21:40:04", "languages": [], "model_or_dataset": "model", "model_size": 0.743, "model_architectures": "CustomCLIPModel", "multi_labels": [ "Representation Learning", "Visual Data in NLP", "Multimodality" ] }, { "description": "cyberagent/calm2-7b-chatの出力を人手でチェック・修正することで作成した日本語Instructionデータセットです。 ", "url": "https://huggingface.co/datasets/Kendamarron/jimba-instuction-1k-beta", "project_name": "jimba-instuction-1k-beta", "downloads": 128, "source": "Hugging Face", "score": -0.05119626663430685, "first_commit": "2024-02-29 15:23:48", "latest_commit": "2024-04-25 12:49:28", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "matsuolab-weblab-10b-gguf matsuo-labさんが公開しているweblab-10bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/matsuolab-weblab-10b-gguf", "project_name": "matsuolab-weblab-10b-gguf", "downloads": 127, "source": "Hugging Face", "score": -0.05120336576000136, "first_commit": "2023-08-21 11:18:10", "latest_commit": "2023-09-02 18:15:45", "languages": [], "model_or_dataset": "model", "model_size": 10.7, "model_architectures": null, "multi_labels": [] }, { "description": "Model Card for Model ID 料理を検索するための質問文から、検索検索用キーワードである固有表現を抽出します Model Details Model Description 例えば、「東京の肉料理で、春に食べられる、鶏肉を使った料理を教えてください」という文章を入力すると、「東京 → 都道府県/地方(AREA)」「肉料理 → 種類(TYPE)」「春 → 季節(SZN)", "url": "https://huggingface.co/wolf4032/bert-japanese-token-classification-search-local-cuisine", "project_name": "bert-japanese-token-classification-search-local-cuisine", "downloads": 127, "source": "Hugging Face", "score": -0.05120336576000136, "first_commit": "2024-04-28 06:45:18", "latest_commit": "2024-05-12 07:20:39", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Model Card for Model ID", "url": "https://huggingface.co/kkuramitsu/mt5-mini9L", "project_name": "mt5-mini9L", "downloads": 126, "source": "Hugging Face", "score": -0.05121046488569587, "first_commit": "2023-03-30 01:11:17", "latest_commit": "2023-10-15 10:56:23", "languages": [], "model_or_dataset": "model", "model_size": 0.08040000000000001, "model_architectures": "MT5ForConditionalGeneration", "multi_labels": [ "Multilinguality", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese CLIP ViT-H/14 (Deeper) Table of Contents Overview Usage Model Details Evaluation Limitations and Biases Citation", "url": "https://huggingface.co/hakuhodo-tech/japanese-clip-vit-h-14-bert-deeper", "project_name": "japanese-clip-vit-h-14-bert-deeper", "downloads": 126, "source": "Hugging Face", "score": -0.05121046488569587, "first_commit": "2024-03-06 03:26:35", "latest_commit": "2024-03-06 21:44:31", "languages": [], "model_or_dataset": "model", "model_size": 0.8280000000000001, "model_architectures": "CustomCLIPModel", "multi_labels": [ "Representation Learning", "Visual Data in NLP", "Multimodality" ] }, { "description": "Japanese CLIP ViT-H/14 (Wider) Table of Contents Overview Usage Model Details Evaluation Limitations and Biases Citation See Also Contact Information Overview Developed by:", "url": "https://huggingface.co/hakuhodo-tech/japanese-clip-vit-h-14-bert-wider", "project_name": "japanese-clip-vit-h-14-bert-wider", "downloads": 126, "source": "Hugging Face", "score": -0.05121046488569587, "first_commit": "2024-03-06 03:30:25", "latest_commit": "2024-03-06 21:46:11", "languages": [], "model_or_dataset": "model", "model_size": 0.91, "model_architectures": "CustomCLIPModel", "multi_labels": [ "Representation Learning", "Visual Data in NLP", "Multimodality" ] }, { "description": "Swallow Education Classifier Japanese README Model summary This repository contains fastText classifiers for judging the educational value of Japanese web pages.", "url": "https://huggingface.co/tokyotech-llm/edu-classifier", "project_name": "edu-classifier", "downloads": 125, "source": "Hugging Face", "score": -0.05121756401139038, "first_commit": "2024-12-27 06:26:25", "latest_commit": "2025-01-30 14:18:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification" ] }, { "description": "stockmark/stockmark-100b Stockmark-100b is a 100 billion parameter LLM pretrained from scratch based on Japanese and English corpus of about 910 billion tokens.", "url": "https://huggingface.co/stockmark/stockmark-100b", "project_name": "stockmark-100b", "downloads": 125, "source": "Hugging Face", "score": -0.05121756401139038, "first_commit": "2024-05-13 09:31:40", "latest_commit": "2024-05-15 06:18:10", "languages": [], "model_or_dataset": "model", "model_size": 96.2, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "llm-jp-3-7.2b-instruct This repository provides large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-7.2b-instruct", "project_name": "llm-jp-3-7.2b-instruct", "downloads": 124, "source": "Hugging Face", "score": -0.05122466313708489, "first_commit": "2024-12-02 02:02:31", "latest_commit": "2025-01-31 10:50:54", "languages": [], "model_or_dataset": "model", "model_size": 7.29, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Developed by: SANJAY S License: MIT Finetuned from model: Meta's LLAMA 3.2 (3B)", "url": "https://huggingface.co/ssanjay22/japanese_partner", "project_name": "japanese_partner", "downloads": 124, "source": "Hugging Face", "score": -0.05122466313708489, "first_commit": "2024-12-06 17:42:47", "latest_commit": "2024-12-07 13:23:36", "languages": [], "model_or_dataset": "model", "model_size": 3.21, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "databricks-dolly-15k-ja This repository provides an instruction tuning dataset developed by LLM-jp, a collaborative project launched in Japan.", "url": "https://huggingface.co/datasets/llm-jp/databricks-dolly-15k-ja", "project_name": "databricks-dolly-15k-ja", "downloads": 124, "source": "Hugging Face", "score": -0.05122466313708489, "first_commit": "2024-01-27 07:11:25", "latest_commit": "2024-01-30 18:09:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Summary This is a LLaMA 3 Youko qlora fine-tune, created using a new version of the VNTL dataset.", "url": "https://huggingface.co/lmg-anon/vntl-llama3-8b-v2-gguf", "project_name": "vntl-llama3-8b-v2-gguf", "downloads": 123, "source": "Hugging Face", "score": -0.0512317622627794, "first_commit": "2025-01-02 11:48:03", "latest_commit": "2025-01-02 11:59:48", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [] }, { "description": "Cross-Encoder for Natural Language Inference(NLI) for Japanese This model was trained using SentenceTransformers Cross-Encoder class.", "url": "https://huggingface.co/akiFQC/bert-base-japanese-v3_nli-jsnli-jnli-jsick", "project_name": "bert-base-japanese-v3_nli-jsnli-jnli-jsick", "downloads": 123, "source": "Hugging Face", "score": -0.0512317622627794, "first_commit": "2024-04-26 05:15:05", "latest_commit": "2024-04-26 06:02:55", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "MashiroSA/sovits-emu-dataset A voice dataset collected from Project Sekai charactor Emu Otori Introduction Size: 2735, all WAV format.", "url": "https://huggingface.co/datasets/Emu-Academic/pjsk-emu-dataset", "project_name": "pjsk-emu-dataset", "downloads": 123, "source": "Hugging Face", "score": -0.0512317622627794, "first_commit": "2023-04-07 00:48:45", "latest_commit": "2024-07-16 23:02:04", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", "url": "https://huggingface.co/stabilityai/japanese-stable-vlm", "project_name": "japanese-stable-vlm", "downloads": 122, "source": "Hugging Face", "score": -0.051238861388473916, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 7.57, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Natural Language Interfaces", "Ethical NLP", "Dialogue Systems & Conversational Agents" ] }, { "description": "Local-Novel-LLM-project様の Assistance をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Assistance-7B-GGUF", "project_name": "Assistance-7B-GGUF", "downloads": 122, "source": "Hugging Face", "score": -0.051238861388473916, "first_commit": "2024-05-03 12:16:29", "latest_commit": "2024-05-04 07:48:41", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Llama3ベースの日本語医療LLM MedLlama3-JP このモデルはLlama3の継続学習により作成された４種類のLLMから成るマージモデルです。 ", "url": "https://huggingface.co/EQUES/MedLLama3-JP-v2", "project_name": "MedLLama3-JP-v2", "downloads": 122, "source": "Hugging Face", "score": -0.051238861388473916, "first_commit": "2024-07-01 13:42:17", "latest_commit": "2024-07-13 06:12:43", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Synthetic-JP-EN-Coding-Dataset This repository provides an instruction tuning dataset developed by LLM-jp, a collaborative project launched in Japan.", "url": "https://huggingface.co/datasets/llm-jp/Synthetic-JP-EN-Coding-Dataset", "project_name": "Synthetic-JP-EN-Coding-Dataset", "downloads": 122, "source": "Hugging Face", "score": -0.051238861388473916, "first_commit": "2025-01-31 05:50:10", "latest_commit": "2025-01-31 06:35:59", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Ninja-v1-RP-expressive-GGUF 概要 Aratako/Ninja-v1-RP-expressive-breadcrumbsの量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/Ninja-v1-RP-expressive-breadcrumbs-GGUF", "project_name": "Ninja-v1-RP-expressive-breadcrumbs-GGUF", "downloads": 121, "source": "Hugging Face", "score": -0.05124596051416842, "first_commit": "2024-05-26 13:46:39", "latest_commit": "2024-06-01 11:55:08", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "magpie-sft-v1.0-dpo-judged 概要 llm-jp/magpie-sft-v1.0を元に以下のような改変を加えて作成した日本語Preferenceデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/magpie-sft-v1.0-dpo-judged", "project_name": "magpie-sft-v1.0-dpo-judged", "downloads": 120, "source": "Hugging Face", "score": -0.05125305963986294, "first_commit": "2024-11-27 17:37:12", "latest_commit": "2024-12-15 05:36:57", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Speech-Translation-Instructions The instructions translated from 120 languages Common Voice to english, arabic, japanese, mandarin and french from common voice speech dataset.", "url": "https://huggingface.co/datasets/mesolitica/Speech-Translation-Instructions", "project_name": "Speech-Translation-Instructions", "downloads": 120, "source": "Hugging Face", "score": -0.05125305963986294, "first_commit": "2025-03-30 14:18:00", "latest_commit": "2025-04-01 06:13:50", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "c4ai-command-r-v01-japanese-instruct GGUF版はこちら/Click here for the GGUF version 概要 CohereForAI/c4ai-command-r-v01を、ichikara-instructionを使って追加で日本語インストラクションチューニングを施したモデルです。 ", "url": "https://huggingface.co/Aratako/c4ai-command-r-v01-japanese-instruct", "project_name": "c4ai-command-r-v01-japanese-instruct", "downloads": 117, "source": "Hugging Face", "score": -0.05127435701694647, "first_commit": "2024-04-04 03:56:52", "latest_commit": "2024-04-07 15:18:37", "languages": [], "model_or_dataset": "model", "model_size": 35.0, "model_architectures": "CohereForCausalLM", "multi_labels": [ "Natural Language Interfaces", "Language Models" ] }, { "description": "HODACHI様の EZO-Common-T2-2B-gemma-2-it をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/EZO-Common-T2-2B-gemma-2-it-GGUF", "project_name": "EZO-Common-T2-2B-gemma-2-it-GGUF", "downloads": 117, "source": "Hugging Face", "score": -0.05127435701694647, "first_commit": "2024-08-01 11:38:48", "latest_commit": "2024-08-01 13:42:20", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "QuantFactory/shisa-gamma-7b-v1-GGUF", "url": "https://huggingface.co/QuantFactory/shisa-gamma-7b-v1-GGUF", "project_name": "shisa-gamma-7b-v1-GGUF", "downloads": 117, "source": "Hugging Face", "score": -0.05127435701694647, "first_commit": "2024-06-12 17:16:36", "latest_commit": "2024-06-18 06:17:30", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "以下のデータ源からランダムに抽出したテキストをもとに､phi3で再生成した文章です｡ Wikibooks Wikipedia Cosmopedia 判例データデータ parquetファイルが数十GB程度あります datasetsライブラリからでは､はじめの数GB程度しか読み込めない可能性があります｡git lfsなどでダウンロードする必要がありそうです｡コードこちら一部の計算には東京工業大学のスーパーコンピュータTSUBAME4.0を利用しました｡", "url": "https://huggingface.co/datasets/kanhatakeyama/SyntheticText", "project_name": "SyntheticText", "downloads": 117, "source": "Hugging Face", "score": -0.05127435701694647, "first_commit": "2024-06-04 04:56:13", "latest_commit": "2024-07-16 07:30:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/senryu-test\", split=\"test\") 概要川柳投稿サイトの『写真川柳』と『川柳投稿まるせん』のクロールデータです。 ", "url": "https://huggingface.co/datasets/YANS-official/senryu-test-with-references", "project_name": "senryu-test-with-references", "downloads": 117, "source": "Hugging Face", "score": -0.05127435701694647, "first_commit": "2024-08-28 18:47:38", "latest_commit": "2024-08-31 15:07:22", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Text Generation", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "ichikara-instruction (Non Commercial) LLMのための日本語インストラクションデータ公開ページ公開ページより、本データに関して、言語処理学会第３０回年次大会において発表を行います。", "url": "https://huggingface.co/datasets/p1atdev/ichikara-instruction", "project_name": "ichikara-instruction", "downloads": 117, "source": "Hugging Face", "score": -0.05127435701694647, "first_commit": "2024-03-12 07:09:56", "latest_commit": "2024-03-12 08:36:40", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "RakutenAI-2.0-mini Model Description RakutenAI-2.0-mini is a lightweight Japanese language model trained from scratch using a transformer architecture, designed for efficient performance in resource-constrained environments.", "url": "https://huggingface.co/Rakuten/RakutenAI-2.0-mini", "project_name": "RakutenAI-2.0-mini", "downloads": 116, "source": "Hugging Face", "score": -0.051281456142640985, "first_commit": "2025-02-05 06:17:58", "latest_commit": "2025-02-10 08:41:10", "languages": [], "model_or_dataset": "model", "model_size": 1.53, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese Parler-TTS Large (β版) このリポジトリは、parler-tts/parler-tts-large-v1を基に、日本語でのテキスト読み上げを可能にするよう再学習したモデルを公開しています。", "url": "https://huggingface.co/2121-8/japanese-parler-tts-large-bate", "project_name": "japanese-parler-tts-large-bate", "downloads": 114, "source": "Hugging Face", "score": -0.051295654394030006, "first_commit": "2024-11-19 04:48:45", "latest_commit": "2024-12-05 08:27:28", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ParlerTTSForConditionalGeneration", "multi_labels": [] }, { "description": "Japanese Laws This dataset comprises 8.75K law records retrieved from the official Japanese government website e-Gov. ", "url": "https://huggingface.co/datasets/y2lan/japan-law", "project_name": "japan-law", "downloads": 114, "source": "Hugging Face", "score": -0.051295654394030006, "first_commit": "2023-07-20 06:26:25", "latest_commit": "2023-07-20 06:45:14", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Linguistic Theories", "Annotation and Dataset Development" ] }, { "description": "AugeoBench AugeoBench is a multimodal QA benchmark consisting of Japanese entrance-exam-style geometry questions.", "url": "https://huggingface.co/datasets/Silviase/augeobench", "project_name": "augeobench", "downloads": 114, "source": "Hugging Face", "score": -0.051295654394030006, "first_commit": "2025-04-07 13:50:23", "latest_commit": "2025-04-07 13:52:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Visual Data in NLP", "Question Answering", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "neody/oscar-ja-cleanedのデータの中から256文字以下のものを抽出しました。", "url": "https://huggingface.co/datasets/noname0202/oscar-cleaned-256", "project_name": "oscar-cleaned-256", "downloads": 114, "source": "Hugging Face", "score": -0.051295654394030006, "first_commit": "2024-12-15 12:19:36", "latest_commit": "2024-12-19 11:23:05", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Qwen2.5 Bakeneko 32B Instruct GPTQ int8 (rinna/qwen2.5-bakeneko-32b-instruct-gptq-int8)", "url": "https://huggingface.co/rinna/qwen2.5-bakeneko-32b-instruct-gptq-int8", "project_name": "qwen2.5-bakeneko-32b-instruct-gptq-int8", "downloads": 113, "source": "Hugging Face", "score": -0.05130275351972451, "first_commit": "2025-02-12 08:41:59", "latest_commit": "2025-02-14 04:49:56", "languages": [], "model_or_dataset": "model", "model_size": 9.67, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Language Models", "Green & Sustainable NLP", "Semantic Text Processing" ] }, { "description": "HODACHI様の Llama-3.1-8B-EZO-1.1-it をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Llama-3.1-8B-EZO-1.1-it-GGUF", "project_name": "Llama-3.1-8B-EZO-1.1-it-GGUF", "downloads": 112, "source": "Hugging Face", "score": -0.05130985264541903, "first_commit": "2024-07-31 12:12:01", "latest_commit": "2024-07-31 18:13:59", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Dataset Summary SNOW T15:The simplified corpus for the Japanese language.", "url": "https://huggingface.co/datasets/SNOW-NLP/snow_simplified_japanese_corpus", "project_name": "snow_simplified_japanese_corpus", "downloads": 112, "source": "Hugging Face", "score": -0.05130985264541903, "first_commit": "2022-01-25 16:36:23", "latest_commit": "2024-01-18 11:16:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Paraphrasing", "Text Generation", "Machine Translation", "Language Models", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "Ninja-v1-RP-expressive-GGUF 概要 Aratako/Oumuamua-7b-RPの量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/Oumuamua-7b-RP-GGUF", "project_name": "Oumuamua-7b-RP-GGUF", "downloads": 111, "source": "Hugging Face", "score": -0.05131695177111354, "first_commit": "2024-06-23 13:00:02", "latest_commit": "2024-06-23 14:45:14", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "📰 News", "url": "https://huggingface.co/datasets/tokyotech-llm/swallow-magpie-ultra-v0.1", "project_name": "swallow-magpie-ultra-v0.1", "downloads": 111, "source": "Hugging Face", "score": -0.05131695177111354, "first_commit": "2024-12-05 12:40:04", "latest_commit": "2025-01-07 14:26:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "CommonCrawl Japanese (Filtered PPI) Dataset 本データセットは、CommonCrawlより抽出した約100億（10B）トークン規模の日本語テキストデータから、特に配慮が必要な「要配慮個人情報」をフィルタリング処理したものです。 ", "url": "https://huggingface.co/datasets/matsuo-lab/JP-LLM-Corpus-PII-Filtered-10B", "project_name": "JP-LLM-Corpus-PII-Filtered-10B", "downloads": 111, "source": "Hugging Face", "score": -0.05131695177111354, "first_commit": "2025-03-25 07:12:24", "latest_commit": "2025-03-26 05:34:12", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "GitHub リポジトリ singletongue/wikipedia-utils で公開されているデータセットを利用しています。 ", "url": "https://huggingface.co/datasets/llm-book/jawiki-paragraphs", "project_name": "jawiki-paragraphs", "downloads": 110, "source": "Hugging Face", "score": -0.05132405089680805, "first_commit": "2023-06-03 03:04:05", "latest_commit": "2023-06-03 03:04:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/ogiri-test\", split=\"test\") 概要大喜利投稿サイトBoketeのクロールデータです。", "url": "https://huggingface.co/datasets/YANS-official/ogiri-test", "project_name": "ogiri-test", "downloads": 110, "source": "Hugging Face", "score": -0.05132405089680805, "first_commit": "2024-09-03 15:08:05", "latest_commit": "2024-09-09 05:53:54", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Text Generation", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "モデル概要このモデルは、 Twitter/twhin-bert-large をSNS上のコメントに人手で攻撃性評価を行ったデータセットでFine-tuningすることで作成しました", "url": "https://huggingface.co/TomokiFujihara/twhin-bert-large-japanese-offensiveness-estimation", "project_name": "twhin-bert-large-japanese-offensiveness-estimation", "downloads": 109, "source": "Hugging Face", "score": -0.05133115002250256, "first_commit": "2024-03-24 10:28:39", "latest_commit": "2024-03-24 16:46:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OffensivenessEstimationModel", "multi_labels": [ "Responsible & Trustworthy NLP", "Ethical NLP", "Language Models", "Semantic Text Processing" ] }, { "description": "llm-jp-corpus-v3のkakenサブセット中の日本語テキストを、Qwen/Qwen2.5-32B-Instructを用いて日本語から英語に翻訳したデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/kaken-trans-ja-en", "project_name": "kaken-trans-ja-en", "downloads": 109, "source": "Hugging Face", "score": -0.05133115002250256, "first_commit": "2024-11-29 13:02:31", "latest_commit": "2025-01-09 04:09:18", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese Creativity Questions (JCQ) Dataset Description JCQは創造性を評価するための7タスク、各100問からなる日本語のデータセットです。", "url": "https://huggingface.co/datasets/nlp-waseda/JCQ", "project_name": "JCQ", "downloads": 109, "source": "Hugging Face", "score": -0.05133115002250256, "first_commit": "2025-03-13 05:15:12", "latest_commit": "2025-03-17 05:23:19", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Japanese ELECTRA-small We provide a Japanese ELECTRA-Small model, as described in ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators.", "url": "https://huggingface.co/cinmodel/electra-small-japanese-discriminator", "project_name": "electra-small-japanese-discriminator", "downloads": 108, "source": "Hugging Face", "score": -0.051338249148197075, "first_commit": "2020-11-13 06:49:25", "latest_commit": "2020-12-11 22:26:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForPreTraining", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models" ] }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-Base-Int4", "project_name": "Orion-14B-Base-Int4", "downloads": 108, "source": "Hugging Face", "score": -0.051338249148197075, "first_commit": "2024-01-18 09:50:31", "latest_commit": "2024-03-26 09:55:37", "languages": [], "model_or_dataset": "model", "model_size": 2.69, "model_architectures": "OrionForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "Manga OCR Optical character recognition for Japanese text, with the main focus being Japanese manga.", "url": "https://huggingface.co/Superd4/lasttest", "project_name": "lasttest", "downloads": 108, "source": "Hugging Face", "score": -0.051338249148197075, "first_commit": "2024-04-22 14:58:07", "latest_commit": "2024-04-22 14:58:32", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VisionEncoderDecoderModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Llama-3-8B-Japanese-Instruct-GGUF Original Model haqishen/Llama-3-8B-Japanese-Instruct Run with LlamaEdge LlamaEdge version: v0.10.1 and above Prompt template Prompt type: llama-3-chat Prompt string <|begin_of_text|><|start_header_id|>system<|end_header_id|> {{ system_prompt }}<|eot_id|><|start_header_id|>user<|end_header_id|> {{ user_message_1 }}<|eot_id|><|start_header_id|>assistant<|end_header_id|> {{ model_answer_1 }}<|eot_id|><|start_header", "url": "https://huggingface.co/second-state/Llama-3-8B-Japanese-Instruct-GGUF", "project_name": "Llama-3-8B-Japanese-Instruct-GGUF", "downloads": 108, "source": "Hugging Face", "score": -0.051338249148197075, "first_commit": "2024-05-14 05:37:53", "latest_commit": "2024-05-14 06:42:38", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "QuantFactory/llama-3-youko-8b-GGUF", "url": "https://huggingface.co/QuantFactory/llama-3-youko-8b-GGUF", "project_name": "llama-3-youko-8b-GGUF", "downloads": 108, "source": "Hugging Face", "score": -0.051338249148197075, "first_commit": "2024-06-24 05:04:12", "latest_commit": "2024-06-24 06:35:40", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Wikipediaを用いた日本語の固有表現抽出データセット GitHub: https://github.com/stockmarkteam/ner-wikipedia-dataset/ LICENSE: CC-BY-SA 3.0 Developed by Stockmark Inc.", "url": "https://huggingface.co/datasets/stockmark/ner-wikipedia-dataset", "project_name": "ner-wikipedia-dataset", "downloads": 108, "source": "Hugging Face", "score": -0.051338249148197075, "first_commit": "2023-09-02 14:38:55", "latest_commit": "2023-09-02 14:42:18", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition", "Annotation and Dataset Development" ] }, { "description": "This is a handmade dataset for making a Japanese chatbot.", "url": "https://huggingface.co/datasets/shi3z/rachel", "project_name": "rachel", "downloads": 108, "source": "Hugging Face", "score": -0.051338249148197075, "first_commit": "2023-06-01 00:02:31", "latest_commit": "2023-06-01 23:28:53", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "yacis-electra-small-cyberbullying", "url": "https://huggingface.co/ptaszynski/yacis-electra-small-japanese-cyberbullying", "project_name": "yacis-electra-small-japanese-cyberbullying", "downloads": 107, "source": "Hugging Face", "score": -0.05134534827389158, "first_commit": "2022-01-12 03:57:13", "latest_commit": "2022-01-16 13:51:28", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForSequenceClassification", "multi_labels": [] }, { "description": "Dataset Preprocessing Supported Tasks and Leaderboards Languages 注釈はすべて日本語を主要言語としています。 ", "url": "https://huggingface.co/datasets/shunk031/jsnli", "project_name": "jsnli", "downloads": 107, "source": "Hugging Face", "score": -0.05134534827389158, "first_commit": "2022-12-01 01:31:32", "latest_commit": "2022-12-12 16:36:58", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Reasoning", "Textual Inference", "Annotation and Dataset Development" ] }, { "description": "roberta_qa_japanese (Japanese caption : 日本語の (抽出型) 質問応答のモデル)", "url": "https://huggingface.co/tsmatz/roberta_qa_japanese", "project_name": "roberta_qa_japanese", "downloads": 106, "source": "Hugging Face", "score": -0.051352447399586096, "first_commit": "2022-12-11 03:41:07", "latest_commit": "2024-07-12 00:00:07", "languages": [], "model_or_dataset": "model", "model_size": 0.11, "model_architectures": "RobertaForQuestionAnswering", "multi_labels": [ "Natural Language Interfaces", "Question Answering", "Captioning", "Text Generation", "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", "url": "https://huggingface.co/stabilityai/japanese-stable-diffusion-xl", "project_name": "japanese-stable-diffusion-xl", "downloads": 106, "source": "Hugging Face", "score": -0.051352447399586096, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Natural Language Interfaces", "Ethical NLP", "Dialogue Systems & Conversational Agents" ] }, { "description": "モデル概要このモデルは、 studio-ousia/luke-japanese-large-lite をSNS上のコメントに人手で攻撃性評価を行ったデータセットでFine-tuningすることで作成しました。 ", "url": "https://huggingface.co/TomokiFujihara/luke-japanese-large-lite-offensiveness-estimation", "project_name": "luke-japanese-large-lite-offensiveness-estimation", "downloads": 106, "source": "Hugging Face", "score": -0.051352447399586096, "first_commit": "2024-03-24 10:47:00", "latest_commit": "2024-03-25 01:16:47", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OffensivenessEstimationModel", "multi_labels": [ "Responsible & Trustworthy NLP", "Ethical NLP", "Language Models", "Semantic Text Processing" ] }, { "description": "モデル概要このモデルは、 Twitter/twhin-bert-base をSNS上のコメントに人手で攻撃性評価を行ったデータセットでFine-tuningすることで作成しました", "url": "https://huggingface.co/TomokiFujihara/twhin-bert-base-japanese-offensiveness-estimation", "project_name": "twhin-bert-base-japanese-offensiveness-estimation", "downloads": 106, "source": "Hugging Face", "score": -0.051352447399586096, "first_commit": "2024-03-24 10:15:19", "latest_commit": "2024-03-24 16:05:46", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OffensivenessEstimationModel", "multi_labels": [ "Responsible & Trustworthy NLP", "Ethical NLP", "Language Models", "Semantic Text Processing" ] }, { "description": "Translation Task Thinking Test Model Model Description", "url": "https://huggingface.co/dahara1/translate-task-thinking-test", "project_name": "translate-task-thinking-test", "downloads": 106, "source": "Hugging Face", "score": -0.051352447399586096, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ScreenTalk_JA ScreenTalk_JA is a paired dataset of Japanese speech and Chinese translated text released by DataLabX.", "url": "https://huggingface.co/datasets/DataLabX/ScreenTalk_JA2ZH-XS", "project_name": "ScreenTalk_JA2ZH-XS", "downloads": 106, "source": "Hugging Face", "score": -0.051352447399586096, "first_commit": "2025-04-18 23:05:35", "latest_commit": "2025-04-19 14:57:00", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-Chat-Int4", "project_name": "Orion-14B-Chat-Int4", "downloads": 105, "source": "Hugging Face", "score": -0.05135954652528061, "first_commit": "2024-01-18 09:54:07", "latest_commit": "2024-03-26 10:04:46", "languages": [], "model_or_dataset": "model", "model_size": 2.69, "model_architectures": "OrionForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "このモデルはluke-japanese-base-liteをファインチューニングして、Question-Answeringに用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-finetuned-QA", "project_name": "luke-japanese-base-finetuned-QA", "downloads": 104, "source": "Hugging Face", "score": -0.05136664565097512, "first_commit": "2023-01-15 23:38:30", "latest_commit": "2023-07-21 14:11:02", "languages": [], "model_or_dataset": "model", "model_size": 0.132, "model_architectures": "LukeForQuestionAnswering", "multi_labels": [ "Natural Language Interfaces", "Question Answering", "Language Models", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "Malum-230 Description Malum-230 is a meticulously handcrafted Japanese dataset featuring multi-turn conversations and passages, specifically designed for logical reasoning tasks.", "url": "https://huggingface.co/datasets/Manual-Dataset-Creation-Project/Malum-230", "project_name": "Malum-230", "downloads": 103, "source": "Hugging Face", "score": -0.05137374477666963, "first_commit": "2024-12-26 23:27:07", "latest_commit": "2025-01-05 20:33:36", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning", "Annotation and Dataset Development" ] }, { "description": "This dataset was created by automatically translating \"OpenAssistant/oasst1\" into Japanese.", "url": "https://huggingface.co/datasets/kunishou/oasst1-89k-ja", "project_name": "oasst1-89k-ja", "downloads": 103, "source": "Hugging Face", "score": -0.05137374477666963, "first_commit": "2023-05-06 09:12:30", "latest_commit": "2024-04-01 17:15:31", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "簡単な算数問題を解けるように GRPO で学習してみた。", "url": "https://huggingface.co/p1atdev/qwen2.5-0.5b-grpo-math-01", "project_name": "qwen2.5-0.5b-grpo-math-01", "downloads": 102, "source": "Hugging Face", "score": -0.05138084390236414, "first_commit": "2025-02-02 14:52:44", "latest_commit": "2025-02-06 16:28:10", "languages": [], "model_or_dataset": "model", "model_size": 0.494, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Natural Language Interfaces", "Low-Resource NLP", "Annotation and Dataset Development" ] }, { "description": "Japanese Stock Comment Sentiment Model", "url": "https://huggingface.co/c299m/japanese_stock_sentiment", "project_name": "japanese_stock_sentiment", "downloads": 102, "source": "Hugging Face", "score": -0.05138084390236414, "first_commit": "2023-09-22 16:19:31", "latest_commit": "2023-09-24 06:06:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Sentiment Analysis" ] }, { "description": "About static quants of https://huggingface.co/TFMC/Japanese-Starling-ChatV-7B weighted/imatrix quants are available at https://huggingface.co/mradermacher/Japanese-Starling-ChatV-7B-i1-GGUF Usage If you are unsure how to use GGUF files, refer to one of TheBloke's READMEs for more details, including on how to concatenate multi-part files.", "url": "https://huggingface.co/mradermacher/Japanese-Starling-ChatV-7B-GGUF", "project_name": "Japanese-Starling-ChatV-7B-GGUF", "downloads": 101, "source": "Hugging Face", "score": -0.05138794302805865, "first_commit": "2025-01-24 06:02:19", "latest_commit": "2025-01-24 06:16:30", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "DataPilot/ArrowMint-Gemma3-4B-YUKI-v0.1 このモデルは、Googleのgoogle/gemma-3-4b-itをベースモデルとしています。 ", "url": "https://huggingface.co/DataPilot/ArrowMint-Gemma3-4B-YUKI-v0.1", "project_name": "ArrowMint-Gemma3-4B-YUKI-v0.1", "downloads": 101, "source": "Hugging Face", "score": -0.05138794302805865, "first_commit": "2025-03-29 16:18:44", "latest_commit": "2025-03-31 07:44:39", "languages": [], "model_or_dataset": "model", "model_size": 4.3, "model_architectures": "Gemma3ForConditionalGeneration", "multi_labels": [] }, { "description": "llm-jp-3-172b-instruct3", "url": "https://huggingface.co/llm-jp/llm-jp-3-172b-instruct3", "project_name": "llm-jp-3-172b-instruct3", "downloads": 101, "source": "Hugging Face", "score": -0.05138794302805865, "first_commit": "2024-12-26 07:36:26", "latest_commit": "2025-01-20 19:25:14", "languages": [], "model_or_dataset": "model", "model_size": 172.0, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "JSNLI Version 1.1 のデータセットのうち、フィルタリング後の訓練セット (train_w_filtering)", "url": "https://huggingface.co/datasets/llm-book/jsnli", "project_name": "jsnli", "downloads": 101, "source": "Hugging Face", "score": -0.05138794302805865, "first_commit": "2023-06-19 12:31:46", "latest_commit": "2023-10-25 15:22:46", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "c4ai-command-r-v01-japanese-instruct-GGUF 概要 Aratako/c4ai-command-r-v01-japanese-instructの量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/c4ai-command-r-v01-japanese-instruct-GGUF", "project_name": "c4ai-command-r-v01-japanese-instruct-GGUF", "downloads": 100, "source": "Hugging Face", "score": -0.051395042153753165, "first_commit": "2024-04-05 17:10:51", "latest_commit": "2024-04-07 03:19:34", "languages": [], "model_or_dataset": "model", "model_size": 35.0, "model_architectures": null, "multi_labels": [] }, { "description": "Billingual text is stored in text format.", "url": "https://huggingface.co/datasets/Nexdata/English-Japanese_Parallel_Corpus_Data", "project_name": "English-Japanese_Parallel_Corpus_Data", "downloads": 100, "source": "Hugging Face", "score": -0.051395042153753165, "first_commit": "2023-11-08 10:47:40", "latest_commit": "2024-08-05 03:14:27", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "このモデルはluke-japanese-largeをファインチューニングして、JCommonsenseQA(選択式応答)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-large-commonsenseqa-japanese", "project_name": "luke-large-commonsenseqa-japanese", "downloads": 99, "source": "Hugging Face", "score": -0.05140214127944767, "first_commit": "2023-02-05 16:17:54", "latest_commit": "2023-02-05 17:04:47", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForMultipleChoice", "multi_labels": [ "Reasoning", "Language Models", "Commonsense Reasoning", "Semantic Text Processing" ] }, { "description": "Ninja-v1-RP-expressive-GGUF 概要 Aratako/Ninja-v1-RP-expressiveの量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/Ninja-v1-RP-expressive-GGUF", "project_name": "Ninja-v1-RP-expressive-GGUF", "downloads": 99, "source": "Hugging Face", "score": -0.05140214127944767, "first_commit": "2024-05-21 12:16:42", "latest_commit": "2024-05-24 15:11:25", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Data extracted from CommonCrawlPDF Japanese domain Code is here", "url": "https://huggingface.co/datasets/hatakeyama-llm-team/CommonCrawlPDFJa", "project_name": "CommonCrawlPDFJa", "downloads": 99, "source": "Hugging Face", "score": -0.05140214127944767, "first_commit": "2024-04-09 06:40:01", "latest_commit": "2024-05-28 12:03:17", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Development Process question dataset from hotchpotch/japanese-qa-reasoning-100k", "url": "https://huggingface.co/datasets/jaeyong2/ja-reasoning", "project_name": "ja-reasoning", "downloads": 99, "source": "Hugging Face", "score": -0.05140214127944767, "first_commit": "2025-03-31 04:18:34", "latest_commit": "2025-04-23 06:21:29", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning", "Natural Language Interfaces", "Question Answering", "Annotation and Dataset Development" ] }, { "description": "Shisa V2 Shisa V2 is a family of bilingual Japanese and English (JA/EN)", "url": "https://huggingface.co/shisa-ai/shisa-v2-mistral-nemo-12b", "project_name": "shisa-v2-mistral-nemo-12b", "downloads": 98, "source": "Hugging Face", "score": -0.051409240405142186, "first_commit": "2025-04-12 17:45:09", "latest_commit": "2025-04-16 13:27:15", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "このモデルはluke-japanese-largeをファインチューニングして、固有表現抽出（NER）に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-large-finetuned-ner", "project_name": "luke-japanese-large-finetuned-ner", "downloads": 98, "source": "Hugging Face", "score": -0.051409240405142186, "first_commit": "2023-05-11 12:00:20", "latest_commit": "2023-05-11 13:02:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition" ] }, { "description": "EZO-QwQ-32B-Preview This is a merge of pre-trained language models created using mergekit. ", "url": "https://huggingface.co/nitky/EZO-QwQ-32B-Preview", "project_name": "EZO-QwQ-32B-Preview", "downloads": 98, "source": "Hugging Face", "score": -0.051409240405142186, "first_commit": "2024-12-01 11:01:31", "latest_commit": "2024-12-02 14:05:29", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "oasst2-33k-ja This repository provides an instruction tuning dataset developed by LLM-jp, a collaborative project launched in Japan.", "url": "https://huggingface.co/datasets/llm-jp/oasst2-33k-ja", "project_name": "oasst2-33k-ja", "downloads": 98, "source": "Hugging Face", "score": -0.051409240405142186, "first_commit": "2024-04-28 16:24:00", "latest_commit": "2024-04-28 16:39:03", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Annotation and Dataset Development" ] }, { "description": "LLM-jp-3 VILA 14B", "url": "https://huggingface.co/llm-jp/llm-jp-3-vila-14b", "project_name": "llm-jp-3-vila-14b", "downloads": 97, "source": "Hugging Face", "score": -0.0514163395308367, "first_commit": "2024-10-26 07:48:03", "latest_commit": "2024-11-18 08:29:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlavaLlamaModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "This model was created by merging intfloat/e5-mistral-7b-instruct and stabilityai/japanese-stablelm-base-gamma-7b.", "url": "https://huggingface.co/oshizo/japanese-e5-mistral-7b_slerp", "project_name": "japanese-e5-mistral-7b_slerp", "downloads": 96, "source": "Hugging Face", "score": -0.051423438656531206, "first_commit": "2024-01-04 12:33:19", "latest_commit": "2024-01-05 15:48:24", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralModel", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Original Model Optical character recognition for Japanese text, with the main focus being Japanese manga.", "url": "https://huggingface.co/TareHimself/manga-ocr-base", "project_name": "manga-ocr-base", "downloads": 96, "source": "Hugging Face", "score": -0.051423438656531206, "first_commit": "2023-09-14 04:15:52", "latest_commit": "2024-06-03 05:10:11", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "VisionEncoderDecoderModel", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "マルチリンガルデータセットです。", "url": "https://huggingface.co/datasets/Sakalti/Multilingal-sakalt-data", "project_name": "Multilingal-sakalt-data", "downloads": 96, "source": "Hugging Face", "score": -0.051423438656531206, "first_commit": "2024-10-14 05:11:16", "latest_commit": "2024-10-17 10:41:45", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "What’s this?", "url": "https://huggingface.co/globis-university/deberta-v3-japanese-xsmall", "project_name": "deberta-v3-japanese-xsmall", "downloads": 95, "source": "Hugging Face", "score": -0.05143053778222572, "first_commit": "2023-09-21 16:12:53", "latest_commit": "2024-07-05 05:48:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing", "Morphology" ] }, { "description": "オリジナルのサイトと同じものを使用しています。 ", "url": "https://huggingface.co/datasets/llm-book/livedoor-news-corpus", "project_name": "livedoor-news-corpus", "downloads": 95, "source": "Hugging Face", "score": -0.05143053778222572, "first_commit": "2023-06-21 07:16:52", "latest_commit": "2023-12-12 11:19:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition", "Annotation and Dataset Development" ] }, { "description": "モデル概要 AWSのtrn1インスタンスを用いて開発した大喜利言語モデルです。", "url": "https://huggingface.co/watashiha/watashiha-gpt-6b", "project_name": "watashiha-gpt-6b", "downloads": 94, "source": "Hugging Face", "score": -0.051437636907920234, "first_commit": "2023-12-28 05:41:38", "latest_commit": "2024-03-04 05:21:14", "languages": [], "model_or_dataset": "model", "model_size": 5.83, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "nlp-waseda/roberta-large-japanese Model description This is a Japanese RoBERTa large model pretrained on Japanese Wikipedia and the Japanese portion of CC-100.", "url": "https://huggingface.co/nlp-waseda/roberta-large-japanese", "project_name": "roberta-large-japanese", "downloads": 94, "source": "Hugging Face", "score": -0.051437636907920234, "first_commit": "2022-05-10 08:37:48", "latest_commit": "2022-10-21 14:48:46", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Preface Small parameter LLMs are ideal for navigating the complexities of the Japanese language, which involves multiple character systems like kanji, hiragana, and katakana, along with subtle social cues.", "url": "https://huggingface.co/AELLM/Llama-3.2-Chibi-3B", "project_name": "Llama-3.2-Chibi-3B", "downloads": 94, "source": "Hugging Face", "score": -0.051437636907920234, "first_commit": "2024-10-14 14:39:54", "latest_commit": "2024-10-15 17:15:54", "languages": [], "model_or_dataset": "model", "model_size": 3.21, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "please see dahara1/Qwen2.5-3B-Instruct-gguf-japanese-imatrix-128K", "url": "https://huggingface.co/dahara1/Qwen2.5-7B-Instruct-gguf-japanese-imatrix-128K", "project_name": "Qwen2.5-7B-Instruct-gguf-japanese-imatrix-128K", "downloads": 94, "source": "Hugging Face", "score": -0.051437636907920234, "first_commit": "2024-11-16 08:00:16", "latest_commit": "2024-11-19 05:08:07", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Ruri-Reranker: Japanese General Reranker Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-reranker-small", "project_name": "ruri-reranker-small", "downloads": 94, "source": "Hugging Face", "score": -0.051437636907920234, "first_commit": "2024-08-19 12:39:07", "latest_commit": "2024-09-04 08:50:32", "languages": [], "model_or_dataset": "model", "model_size": 0.06870000000000001, "model_architectures": "DistilBertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Danbooru2023:", "url": "https://huggingface.co/datasets/jpft/danbooru2023", "project_name": "danbooru2023", "downloads": 94, "source": "Hugging Face", "score": -0.051437636907920234, "first_commit": "2024-01-11 10:28:25", "latest_commit": "2024-01-11 10:28:25", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Tagging" ] }, { "description": "[github].", "url": "https://huggingface.co/datasets/fujiki/japanese_alpaca_data", "project_name": "japanese_alpaca_data", "downloads": 94, "source": "Hugging Face", "score": -0.051437636907920234, "first_commit": "2023-05-18 07:13:15", "latest_commit": "2023-05-19 12:54:13", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "Magpie-Tanuki-8B-annotated-96k Magpieの手法をweblab-GENIAC/Tanuki-8B-dpo-v1.0に対して適用し作成したデータセットであるAratako/Magpie-Tanuki-8B-97kに対して、cyberagent/calm3-22b-chatを用いてinstructionに対して難易度、クオリティ、カテゴリをアノテーションしたデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Magpie-Tanuki-8B-annotated-96k", "project_name": "Magpie-Tanuki-8B-annotated-96k", "downloads": 94, "source": "Hugging Face", "score": -0.051437636907920234, "first_commit": "2024-10-23 06:23:00", "latest_commit": "2024-10-24 14:48:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Tagging" ] }, { "description": "Stockmark-2-100B-Instruct-beta Model description Stockmark-2-100B-Instruct-beta is a 100-billion-parameter large language model built from scratch, with a particular focus on Japanese.", "url": "https://huggingface.co/stockmark/Stockmark-2-100B-Instruct-beta", "project_name": "Stockmark-2-100B-Instruct-beta", "downloads": 93, "source": "Hugging Face", "score": -0.05144473603361474, "first_commit": "2025-03-05 06:35:38", "latest_commit": "2025-03-06 02:58:59", "languages": [], "model_or_dataset": "model", "model_size": 96.0, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "GPT-2 small Japanese model This repository contains a GPT2-small model trained on Japanese Wikipedia dataset.", "url": "https://huggingface.co/colorfulscoop/gpt2-small-ja", "project_name": "gpt2-small-ja", "downloads": 93, "source": "Hugging Face", "score": -0.05144473603361474, "first_commit": "2021-03-27 02:27:05", "latest_commit": "2021-09-27 20:50:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Ninja-v1-RP-GGUF 概要 Aratako/Ninja-v1-RPの量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/Ninja-v1-RP-GGUF", "project_name": "Ninja-v1-RP-GGUF", "downloads": 93, "source": "Hugging Face", "score": -0.05144473603361474, "first_commit": "2024-05-20 17:08:50", "latest_commit": "2024-05-24 15:11:08", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "DeepSeek-R1-Distill-Qwen-14B-Japanese GGUF Model Description", "url": "https://huggingface.co/aplulu/cyberagent-DeepSeek-R1-Distill-Qwen-14B-Japanese-GGUF", "project_name": "cyberagent-DeepSeek-R1-Distill-Qwen-14B-Japanese-GGUF", "downloads": 92, "source": "Hugging Face", "score": -0.051451835159309255, "first_commit": "2025-01-31 09:53:18", "latest_commit": "2025-01-31 09:53:18", "languages": [], "model_or_dataset": "model", "model_size": 14.8, "model_architectures": null, "multi_labels": [ "Representation Learning", "Language Models", "Green & Sustainable NLP", "Semantic Text Processing" ] }, { "description": "Wav2Vec2-Large-XLSR-53-Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice and Japanese speech corpus of Saruwatari-lab, University of Tokyo JSUT.", "url": "https://huggingface.co/vumichien/wav2vec2-large-xlsr-japanese-hiragana", "project_name": "wav2vec2-large-xlsr-japanese-hiragana", "downloads": 92, "source": "Hugging Face", "score": -0.051451835159309255, "first_commit": "2021-06-18 07:15:24", "latest_commit": "2023-02-08 00:36:47", "languages": [], "model_or_dataset": "model", "model_size": 0.316, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "Model Card For llm-jp-3-1.8b-instruct-gguf LLM-jpさんのllm-jp-3-1.8b-instructを量子化したものたちです。 ", "url": "https://huggingface.co/alfredplpl/llm-jp-3-1.8b-instruct-gguf", "project_name": "llm-jp-3-1.8b-instruct-gguf", "downloads": 92, "source": "Hugging Face", "score": -0.051451835159309255, "first_commit": "2024-09-28 02:10:35", "latest_commit": "2024-10-03 10:01:32", "languages": [], "model_or_dataset": "model", "model_size": 1.87, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "Magpie-Tanuki-Qwen2.5-72B-Answered Aratako/Magpie-Tanuki-8B-annotated-96kからinput_qualityがexcellentのものを抽出し、それに対してQwen/Qwen2.5-72B-Instructで回答の再生成を行ったデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Magpie-Tanuki-Qwen2.5-72B-Answered", "project_name": "Magpie-Tanuki-Qwen2.5-72B-Answered", "downloads": 92, "source": "Hugging Face", "score": -0.051451835159309255, "first_commit": "2024-11-21 14:38:55", "latest_commit": "2024-11-25 16:48:52", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Gemma-Mling: Multilingual Gemma Update @ 2024.04.15: First release of Gemma-Mling 7B model Original Gemma Model Page:", "url": "https://huggingface.co/beomi/gemma-mling-7b", "project_name": "gemma-mling-7b", "downloads": 91, "source": "Hugging Face", "score": -0.05145893428500376, "first_commit": "2024-04-15 05:37:05", "latest_commit": "2024-04-18 14:28:20", "languages": [], "model_or_dataset": "model", "model_size": 8.54, "model_architectures": "GemmaForCausalLM", "multi_labels": [ "Multilinguality" ] }, { "description": "Llama-3-8B-Instruct-JP-nk2t-v0.2 Model Details: Built with Meta Llama 3", "url": "https://huggingface.co/nk2t/Llama-3-8B-Instruct-japanese-nk2t-v0.2", "project_name": "Llama-3-8B-Instruct-japanese-nk2t-v0.2", "downloads": 91, "source": "Hugging Face", "score": -0.05145893428500376, "first_commit": "2024-05-04 04:16:35", "latest_commit": "2024-05-15 12:56:34", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "医師国家試験データセット（NMLE datasets）はじめに日本の医師国家試験のデータセット第110回 - 第117回までのデータ用途用途としてモデルの評価進化的モデルマージのタスクにつかう（「New Task Guide」参照） RAGなどに用いる情報源医師国家試験の俯瞰を想定しています構造 data = { \"id\": question_id, \"question\": question_text, \"choices\": choices, \"answer\": answers, \"explanation\": explanation } 一部、構造化データにできなかった問題（画像がメインの出題など）が抜けていますライセンスライセンスはCC-BY-NC-ND4.0で、商用利用禁止となっています改変、改善、その他ご相談については X: @longislandtea3 までお願いします免責事項このデータセットの使用により生じた直接的、間接的、特別な損害、またはその他の損害について、一切の責任を負いません。", "url": "https://huggingface.co/datasets/longisland3/NMLE", "project_name": "NMLE", "downloads": 91, "source": "Hugging Face", "score": -0.05145893428500376, "first_commit": "2024-06-13 08:30:18", "latest_commit": "2024-07-01 16:15:33", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Explainability & Interpretability in NLP", "Annotation and Dataset Development" ] }, { "description": "日本語ByT5事前学習済みモデル This is a ByT5 (a tokenizer-free extension of the Text-to-Text Transfer Transformer) model pretrained on Japanese corpus. ", "url": "https://huggingface.co/sonoisa/byt5-small-japanese", "project_name": "byt5-small-japanese", "downloads": 90, "source": "Hugging Face", "score": -0.051466033410698275, "first_commit": "2021-06-04 13:14:22", "latest_commit": "2021-09-23 18:29:53", "languages": [], "model_or_dataset": "model", "model_size": 0.3, "model_architectures": "MT5ForConditionGeneration", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "In-progess long-context Japanese-English translation model based on tinyllama.", "url": "https://huggingface.co/NilanE/tinyllama-en_ja-translation-v2", "project_name": "tinyllama-en_ja-translation-v2", "downloads": 90, "source": "Hugging Face", "score": -0.051466033410698275, "first_commit": "2024-03-06 16:45:44", "latest_commit": "2024-03-28 16:36:13", "languages": [], "model_or_dataset": "model", "model_size": 1.1, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "GitHub リポジトリ cl-tohoku/quiz-datasets で公開されているデータセットを利用しています。 ", "url": "https://huggingface.co/datasets/llm-book/aio-retriever", "project_name": "aio-retriever", "downloads": 90, "source": "Hugging Face", "score": -0.051466033410698275, "first_commit": "2023-07-04 04:53:47", "latest_commit": "2023-10-25 15:31:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Retrieval", "Annotation and Dataset Development" ] }, { "description": "whisper-large-v3-japanese-4k-steps This model is a fine-tuned version of openai/whisper-large-v3 on the Common Voice 16.1 dataset.", "url": "https://huggingface.co/drewschaub/whisper-large-v3-japanese-4k-steps", "project_name": "whisper-large-v3-japanese-4k-steps", "downloads": 89, "source": "Hugging Face", "score": -0.05147313253639279, "first_commit": "2024-02-17 01:01:51", "latest_commit": "2024-02-18 01:31:35", "languages": [], "model_or_dataset": "model", "model_size": 1.54, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "ku-nlp/roberta-large-japanese-char-wwm Model description This is a Japanese RoBERTa large model pre-trained on Japanese Wikipedia and the Japanese portion of CC-100.", "url": "https://huggingface.co/ku-nlp/roberta-large-japanese-char-wwm", "project_name": "roberta-large-japanese-char-wwm", "downloads": 89, "source": "Hugging Face", "score": -0.05147313253639279, "first_commit": "2022-09-18 08:10:44", "latest_commit": "2023-03-19 01:58:12", "languages": [], "model_or_dataset": "model", "model_size": 0.323, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese-Starling-ChatV-7B このモデルは\"chatntq-ja-7b-v1.0\"をベースにした7Bパラメータの日本語チャットモデルです。", "url": "https://huggingface.co/TFMC/Japanese-Starling-ChatV-7B", "project_name": "Japanese-Starling-ChatV-7B", "downloads": 89, "source": "Hugging Face", "score": -0.05147313253639279, "first_commit": "2024-04-14 12:18:31", "latest_commit": "2024-04-14 15:26:06", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "A slightly modified version of the parsing and chunking method for singletongue/wikipedia-utils.", "url": "https://huggingface.co/datasets/oshizo/japanese-wikipedia-paragraphs", "project_name": "japanese-wikipedia-paragraphs", "downloads": 89, "source": "Hugging Face", "score": -0.05147313253639279, "first_commit": "2023-12-09 11:14:53", "latest_commit": "2023-12-09 14:09:30", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Chunking", "Annotation and Dataset Development" ] }, { "description": "SakuraLLM Sakura: SFT And RLHF models using Knowledge of Universal Character and Relationship Attributes for Japanese to Chinese Translation in Light Novel & Galgame Domain.", "url": "https://huggingface.co/mav23/Sakura-13B-Galgame-GGUF", "project_name": "Sakura-13B-Galgame-GGUF", "downloads": 88, "source": "Hugging Face", "score": -0.051480231662087296, "first_commit": "2024-11-29 04:07:35", "latest_commit": "2024-11-29 05:48:09", "languages": [], "model_or_dataset": "model", "model_size": 13.9, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Feedback and support: TensorBlock's Twitter/X, Telegram Group and Discord server cyberagent/Mistral-Nemo-Japanese-Instruct-2408 - GGUF", "url": "https://huggingface.co/tensorblock/Mistral-Nemo-Japanese-Instruct-2408-GGUF", "project_name": "Mistral-Nemo-Japanese-Instruct-2408-GGUF", "downloads": 87, "source": "Hugging Face", "score": -0.05148733078778181, "first_commit": "2024-11-28 13:48:02", "latest_commit": "2024-11-28 14:52:02", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "llm-jp-1.3b-v1.0-aya llm-jp's llm-jp-1.3b-v1.0 model fine-tuned on the Japanese examples from Cohere's aya dataset Model llm-jp-eval AVG kcoopermiller/llm-jp-1.3b-v1.0-aya 0.0698 llm-jp/llm-jp-1.3b-v1.0 0.047 How to use import torch from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained(\"kcoopermiller/llm-jp-1.3b-v1.0-aya\")", "url": "https://huggingface.co/kcoopermiller/llm-jp-1.3b-v1.0-aya", "project_name": "llm-jp-1.3b-v1.0-aya", "downloads": 86, "source": "Hugging Face", "score": -0.051494429913476324, "first_commit": "2024-02-23 05:39:39", "latest_commit": "2024-02-29 23:48:58", "languages": [], "model_or_dataset": "model", "model_size": 1.32, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Open-Platypus-Japanese-masked-formatted weblab-GENIAC/Open-Platypus-Japanese-maskedをOpenAI messages形式に変換したデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Open-Platypus-Japanese-masked-formatted", "project_name": "Open-Platypus-Japanese-masked-formatted", "downloads": 86, "source": "Hugging Face", "score": -0.051494429913476324, "first_commit": "2024-11-22 03:08:54", "latest_commit": "2024-11-25 16:54:55", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "zenz-v1 Checkpoints zenz-v1 is a language model specialized for kana-kanji conversion tasks based on the GPT-2 architecture.", "url": "https://huggingface.co/Miwa-Keita/zenz-v1-checkpoints", "project_name": "zenz-v1-checkpoints", "downloads": 85, "source": "Hugging Face", "score": -0.05150152903917083, "first_commit": "2024-06-28 14:26:33", "latest_commit": "2024-06-28 14:53:43", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "RakutenAI-7B-gguf Rakutenさんが公開しているRakutenAI-7Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/RakutenAI-7B-gguf", "project_name": "RakutenAI-7B-gguf", "downloads": 85, "source": "Hugging Face", "score": -0.05150152903917083, "first_commit": "2024-03-21 11:29:22", "latest_commit": "2024-03-21 12:48:26", "languages": [], "model_or_dataset": "model", "model_size": 7.37, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "MashiroSA/sovits-emu-dataset A voice dataset collected from Project Sekai charactor Emu Otori Introduction Size: 2735, all WAV format.", "url": "https://huggingface.co/datasets/chitsanfei/pjsk-emu-dataset", "project_name": "pjsk-emu-dataset", "downloads": 85, "source": "Hugging Face", "score": -0.05150152903917083, "first_commit": "2023-04-07 00:48:45", "latest_commit": "2024-07-16 23:02:04", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Dataset overview This dataset identifies whether a GitHub repository description pertains to Japanese natural language processing (NLP).", "url": "https://huggingface.co/datasets/taishi-i/awesome-japanese-nlp-classification-dataset", "project_name": "awesome-japanese-nlp-classification-dataset", "downloads": 85, "source": "Hugging Face", "score": -0.05150152903917083, "first_commit": "2023-09-09 06:37:36", "latest_commit": "2023-09-09 20:09:04", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "https://huggingface.co/SousiOmine/minoshiro-v0.2-7B のGGUF量子化版です。", "url": "https://huggingface.co/SousiOmine/minoshiro-v0.2-7B_GGUF", "project_name": "minoshiro-v0.2-7B_GGUF", "downloads": 84, "source": "Hugging Face", "score": -0.051508628164865344, "first_commit": "2025-01-05 03:28:18", "latest_commit": "2025-01-05 04:55:16", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Synthetic-Japanese-Roleplay-gpt-4o-mini-39.6k-formatted 20240907 データ増量（約19800件→約39600件）概要 gpt-4o-miniを用いて作成した日本語ロールプレイデータセットであるAratako/Synthetic-Japanese-Roleplay-gpt-4o-mini-39.6kにsystem messageを追加して整形したデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-Japanese-Roleplay-gpt-4o-mini-39.6k-formatted", "project_name": "Synthetic-Japanese-Roleplay-gpt-4o-mini-39.6k-formatted", "downloads": 84, "source": "Hugging Face", "score": -0.051508628164865344, "first_commit": "2024-08-16 16:46:06", "latest_commit": "2024-09-07 12:34:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Shisa V2 Shisa V2 is a family of bilingual Japanese and English (JA/EN)", "url": "https://huggingface.co/shisa-ai/shisa-v2-qwen2.5-32b", "project_name": "shisa-v2-qwen2.5-32b", "downloads": 83, "source": "Hugging Face", "score": -0.05151572729055985, "first_commit": "2025-04-13 13:11:07", "latest_commit": "2025-04-16 13:28:00", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "このモデルはdeberta-v2-base-japaneseをファインチューニングしてQAタスクに用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-base-japanese-finetuned-QAe", "project_name": "deberta-v2-base-japanese-finetuned-QAe", "downloads": 83, "source": "Hugging Face", "score": -0.05151572729055985, "first_commit": "2023-01-09 11:59:13", "latest_commit": "2023-03-27 02:43:35", "languages": [], "model_or_dataset": "model", "model_size": 0.112, "model_architectures": "DebertaV2ForQuestionAnswering", "multi_labels": [ "Natural Language Interfaces", "Question Answering", "Language Models", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "4-bit 量子化版 llm-jp-3-172b-instruct3 本リポジトリでは、大学共同利用機関法人情報・システム研究機構国立情報学研究所（以下「NII」）が提供する「llm-jp-3-172b-instruct3」(以下「本モデル」) を 4-bit 量子化した派生モデル (以下「本量子化モデル」) を公開しています。 ", "url": "https://huggingface.co/DeL-TaiseiOzaki/llm-jp-3-172b-instruct3-4bit", "project_name": "llm-jp-3-172b-instruct3-4bit", "downloads": 83, "source": "Hugging Face", "score": -0.05151572729055985, "first_commit": "2024-12-25 06:42:49", "latest_commit": "2024-12-25 08:20:53", "languages": [], "model_or_dataset": "model", "model_size": 90.1, "model_architectures": "LlamaForCausalLM", "multi_labels": [] }, { "description": "Dataset details: Each entry in this dataset is a sentence-aligned Japanese web novel chapter and English fan translation.", "url": "https://huggingface.co/datasets/NilanE/ParallelFiction-Ja_En-100k", "project_name": "ParallelFiction-Ja_En-100k", "downloads": 83, "source": "Hugging Face", "score": -0.05151572729055985, "first_commit": "2024-03-25 23:41:17", "latest_commit": "2024-06-02 18:03:38", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Information Extraction & Text Mining", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "Tanuki-8x8B-dpo-v1.0 モデルについて Tanuki-8x8Bは、フルスクラッチで約1.7Tトークン事前学習を行った8x8Bパラメータ（総パラメータ約47B、アクティブパラメータ約13B）の大規模言語モデルです。", "url": "https://huggingface.co/weblab-GENIAC/Tanuki-8x8B-dpo-v1.0", "project_name": "Tanuki-8x8B-dpo-v1.0", "downloads": 82, "source": "Hugging Face", "score": -0.051522826416254365, "first_commit": "2024-08-12 12:47:11", "latest_commit": "2024-09-02 23:47:09", "languages": [], "model_or_dataset": "model", "model_size": 47.0, "model_architectures": "TanukiForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "alabnii/jmedroberta-base-manbyo-wordpiece-vocab50000 Model description This is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", "url": "https://huggingface.co/alabnii/jmedroberta-base-manbyo-wordpiece-vocab50000", "project_name": "jmedroberta-base-manbyo-wordpiece-vocab50000", "downloads": 82, "source": "Hugging Face", "score": -0.051522826416254365, "first_commit": "2022-12-22 17:19:15", "latest_commit": "2023-03-08 01:47:12", "languages": [], "model_or_dataset": "model", "model_size": 0.124, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Manga OCR Optical character recognition for Japanese text, with the main focus being Japanese manga.", "url": "https://huggingface.co/mathewthe2/manga-ocr-base", "project_name": "manga-ocr-base", "downloads": 82, "source": "Hugging Face", "score": -0.051522826416254365, "first_commit": "2024-06-01 02:20:15", "latest_commit": "2024-06-01 15:14:24", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "VisionEncoderDecoderModel", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Deepreneur-blue-lizard Model Description Deepreneur-blue-lizardは、MetaのLlama-2-7bに対して、Wikipediaや書籍等の日本語の学習データを用いて追加事前学習と独自データによるファインチューニングを実施したモデルです。", "url": "https://huggingface.co/Deepreneur/blue-lizard", "project_name": "blue-lizard", "downloads": 82, "source": "Hugging Face", "score": -0.051522826416254365, "first_commit": "2024-02-05 16:29:48", "latest_commit": "2024-02-12 14:43:33", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Dialogue Response Generation", "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "Tanuki-8x8B-dpo-v1.0-AWQ 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8x8B-dpo-v1.0のAWQ 4bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ", "project_name": "Tanuki-8x8B-dpo-v1.0-AWQ", "downloads": 82, "source": "Hugging Face", "score": -0.051522826416254365, "first_commit": "2024-08-27 09:31:22", "latest_commit": "2024-09-03 09:26:20", "languages": [], "model_or_dataset": "model", "model_size": 6.75, "model_architectures": "TanukiForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Shisa V2 Shisa V2 is a family of bilingual Japanese and English (JA/EN)", "url": "https://huggingface.co/shisa-ai/shisa-v2-unphi4-14b", "project_name": "shisa-v2-unphi4-14b", "downloads": 81, "source": "Hugging Face", "score": -0.05152992554194888, "first_commit": "2025-04-12 13:47:30", "latest_commit": "2025-04-16 13:27:38", "languages": [], "model_or_dataset": "model", "model_size": 14.7, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "Ninja-v1-RP-expressive-GGUF 概要 Aratako/Ninja-v1-RP-expressive-v2の量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/Ninja-v1-RP-expressive-v2-GGUF", "project_name": "Ninja-v1-RP-expressive-v2-GGUF", "downloads": 81, "source": "Hugging Face", "score": -0.05152992554194888, "first_commit": "2024-05-26 06:09:57", "latest_commit": "2024-05-26 15:22:01", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "日本語WikipediaからLLMを用いて自動生成した質問と、対応する日本語Wikipediaのページを元に、cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japaneseを用いて回答を生成したデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/r1-distill-qwen-pseudo-qa", "project_name": "r1-distill-qwen-pseudo-qa", "downloads": 81, "source": "Hugging Face", "score": -0.05152992554194888, "first_commit": "2025-02-01 11:07:57", "latest_commit": "2025-02-10 01:19:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "cc100-ja-documents HuggingFace で公開されている cc100 / cc100-ja は line 単位の分割のため、document 単位に結合したものです。 ", "url": "https://huggingface.co/datasets/hotchpotch/cc100-ja-documents", "project_name": "cc100-ja-documents", "downloads": 81, "source": "Hugging Face", "score": -0.05152992554194888, "first_commit": "2024-07-11 22:26:16", "latest_commit": "2024-07-13 12:02:11", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "magpie-reasoning-llama-nemotron-70b-100k-filtered DeL-TaiseiOzaki/magpie-reasoning-llama-nemotron-70b-100kから、refined_answer列に\"改良\"という文字が含まれていないものを抽出し、OpenAI messages形式に変換したデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/magpie-reasoning-llama-nemotron-70b-100k-filtered", "project_name": "magpie-reasoning-llama-nemotron-70b-100k-filtered", "downloads": 81, "source": "Hugging Face", "score": -0.05152992554194888, "first_commit": "2024-11-22 03:06:57", "latest_commit": "2024-11-25 16:53:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning" ] }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-7B-AWQ", "project_name": "japanese-stablelm-instruct-beta-7B-AWQ", "downloads": 80, "source": "Hugging Face", "score": -0.051537024667643386, "first_commit": "2023-11-03 01:04:31", "latest_commit": "2023-11-09 18:16:12", "languages": [], "model_or_dataset": "model", "model_size": 1.13, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "DataPilot様の ArrowPro-7B-KUJIRA をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/ArrowPro-7B-KUJIRA-GGUF", "project_name": "ArrowPro-7B-KUJIRA-GGUF", "downloads": 80, "source": "Hugging Face", "score": -0.051537024667643386, "first_commit": "2024-05-09 13:34:05", "latest_commit": "2024-05-09 23:32:52", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese E5 Mixtral 7B Slerp GGUF GGUF conversion of oshizo/japanese-e5-mistral-7b_slerp Avaiable formats: Q2_K.gguf Q3_K.gguf Q4_K.gguf Q5_K.gguf", "url": "https://huggingface.co/mm/japanese-e5-mistral-7b_slerp_gguf", "project_name": "japanese-e5-mistral-7b_slerp_gguf", "downloads": 80, "source": "Hugging Face", "score": -0.051537024667643386, "first_commit": "2024-06-09 08:34:37", "latest_commit": "2024-06-14 16:12:17", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese version of MMLU dataset tranlasted by gpt-3.5-turbo.", "url": "https://huggingface.co/datasets/FreedomIntelligence/MMLU_Japanese", "project_name": "MMLU_Japanese", "downloads": 80, "source": "Hugging Face", "score": -0.051537024667643386, "first_commit": "2023-07-24 05:43:09", "latest_commit": "2023-08-06 08:06:24", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Language Models", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "Model Details Model Description", "url": "https://huggingface.co/MIL-UT/Asagi-14B", "project_name": "Asagi-14B", "downloads": 79, "source": "Hugging Face", "score": -0.0515441237933379, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 14.2, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "uniTKU-hubert-japanese-asr", "url": "https://huggingface.co/TKU410410103/uniTKU-hubert-japanese-asr", "project_name": "uniTKU-hubert-japanese-asr", "downloads": 79, "source": "Hugging Face", "score": -0.0515441237933379, "first_commit": "2024-04-20 14:59:52", "latest_commit": "2024-04-22 18:37:33", "languages": [], "model_or_dataset": "model", "model_size": 0.0945, "model_architectures": "HubertForCTC", "multi_labels": [ "Speech Recognition", "Text Generation", "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "The data contains 101,702 entries.", "url": "https://huggingface.co/datasets/Nexdata/Japanese_Pronunciation_Dictionary", "project_name": "Japanese_Pronunciation_Dictionary", "downloads": 79, "source": "Hugging Face", "score": -0.0515441237933379, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "日本語向け Llama 3 8B はじめにこのリポジトリはLlama 3を日本語化しようとしたモデルのリポジトリです。", "url": "https://huggingface.co/alfredplpl/Llama-3-8B-Instruct-Ja", "project_name": "Llama-3-8B-Instruct-Ja", "downloads": 78, "source": "Hugging Face", "score": -0.05155122291903241, "first_commit": "2024-04-22 05:14:33", "latest_commit": "2024-05-01 19:16:01", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "fio-base-japanese-v0.1 日本語版は近日公開予定です（日本語を勉強中なので、間違いはご容赦ください！", "url": "https://huggingface.co/bclavie/fio-base-japanese-v0.1", "project_name": "fio-base-japanese-v0.1", "downloads": 78, "source": "Hugging Face", "score": -0.05155122291903241, "first_commit": "2023-12-18 11:01:07", "latest_commit": "2023-12-19 10:28:16", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertModel", "multi_labels": [ "Representation Learning", "Textual Inference", "Semantic Text Processing" ] }, { "description": "electra-base-cyberbullying This is a BERT Base model for the Japanese language finetuned for automatic cyberbullying detection.", "url": "https://huggingface.co/kit-nlp/bert-base-japanese-sentiment-cyberbullying", "project_name": "bert-base-japanese-sentiment-cyberbullying", "downloads": 77, "source": "Hugging Face", "score": -0.05155832204472692, "first_commit": "2022-09-09 02:16:34", "latest_commit": "2022-11-01 07:18:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Language Models", "Semantic Text Processing", "Sentiment Analysis" ] }, { "description": "fineweb-2-edu-japanese-scores fineweb-2日本語テキストの教育的スコアデータセット (0-4段階) 概要: このデータセットは、FineWeb-Edu classifier の手法に倣い、Deepseek API を用いて、大規模ウェブデータセット fineweb-2 日本語テキストの教育的視点をスコアリングしたものです。 ", "url": "https://huggingface.co/datasets/hotchpotch/fineweb-2-edu-japanese-scores", "project_name": "fineweb-2-edu-japanese-scores", "downloads": 77, "source": "Hugging Face", "score": -0.05155832204472692, "first_commit": "2025-02-10 00:04:59", "latest_commit": "2025-02-11 07:26:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Representation Learning", "Semantic Text Processing" ] }, { "description": "Japanese-RP-Bench-testdata-SFW 本データセットは、LLMの日本語ロールプレイ能力を計測するベンチマークJapanese-RP-Bench用の評価データセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Japanese-RP-Bench-testdata-SFW", "project_name": "Japanese-RP-Bench-testdata-SFW", "downloads": 76, "source": "Hugging Face", "score": -0.051565421170421434, "first_commit": "2024-09-22 06:25:01", "latest_commit": "2024-09-29 05:34:09", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "評価スコアの再現性確保と SB Intuitions 修正版の公開用クローンソース: yahoojapan/JGLUE on GitHub JSQuAD JSQuAD is a Japanese version of SQuAD (Rajpurkar+, 2016), one of the datasets of reading comprehension.", "url": "https://huggingface.co/datasets/sbintuitions/JSQuAD", "project_name": "JSQuAD", "downloads": 76, "source": "Hugging Face", "score": -0.051565421170421434, "first_commit": "2024-07-30 05:20:25", "latest_commit": "2025-01-22 04:16:50", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning", "Natural Language Interfaces", "Machine Reading Comprehension", "Annotation and Dataset Development" ] }, { "description": "Moriyasu_Qwen2_JP_7B Model Description Moriyasu_Qwen2_JP_7B is a large language model trained by Moriyasu.", "url": "https://huggingface.co/AIJapanese/Moriyasu_Qwen2_JP_7B", "project_name": "Moriyasu_Qwen2_JP_7B", "downloads": 75, "source": "Hugging Face", "score": -0.05157252029611595, "first_commit": "2024-12-09 03:26:41", "latest_commit": "2024-12-11 10:17:17", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "CABank Japanese CallHome Corpus Participants: 120 Type of Study: phone call Location: United States Media type: audio DOI: doi:10.21415/T5H59V Web: https://ca.talkbank.org/access/CallHome/jpn.html Citation information Some citation here.", "url": "https://huggingface.co/datasets/Fhrozen/CABankSakuraCHJP", "project_name": "CABankSakuraCHJP", "downloads": 75, "source": "Hugging Face", "score": -0.05157252029611595, "first_commit": "2022-09-14 05:48:24", "latest_commit": "2022-12-03 03:26:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Annotation and Dataset Development" ] }, { "description": "モデルの説明(English explanation is below.", "url": "https://huggingface.co/keitokei1994/Llama-3-ELYZA-sqlcoder-2x8B-GGUF", "project_name": "Llama-3-ELYZA-sqlcoder-2x8B-GGUF", "downloads": 74, "source": "Hugging Face", "score": -0.051579619421810455, "first_commit": "2024-06-28 01:51:50", "latest_commit": "2024-06-28 05:56:23", "languages": [], "model_or_dataset": "model", "model_size": 13.7, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "JaWiki WikipediaのHTML形式のダンプファイルから抽出したテキストデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/jawiki", "project_name": "jawiki", "downloads": 74, "source": "Hugging Face", "score": -0.051579619421810455, "first_commit": "2024-02-02 06:36:01", "latest_commit": "2024-02-13 15:19:49", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Japanese Natural Language Inference Model", "url": "https://huggingface.co/cyberagent/xlm-roberta-large-jnli-jsick", "project_name": "xlm-roberta-large-jnli-jsick", "downloads": 73, "source": "Hugging Face", "score": -0.05158671854750497, "first_commit": "2022-12-23 10:51:12", "latest_commit": "2022-12-23 10:51:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "XLMRobertaForSequenceClassification", "multi_labels": [ "Reasoning", "Textual Inference", "Language Models", "Semantic Text Processing" ] }, { "description": "妖怪知識評価データセットの構築へ向けて」（NLP2025）のデータが含まれています。 ", "url": "https://huggingface.co/datasets/cyberagent/YokaiEval", "project_name": "YokaiEval", "downloads": 73, "source": "Hugging Face", "score": -0.05158671854750497, "first_commit": "2025-02-26 08:37:53", "latest_commit": "2025-03-18 04:56:26", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "出力になにかしらの制約があるシステムプロンプトに対する追従性を向上させるためのデータセットの試作です。", "url": "https://huggingface.co/datasets/SousiOmine/TagInstruct-JP", "project_name": "TagInstruct-JP", "downloads": 73, "source": "Hugging Face", "score": -0.05158671854750497, "first_commit": "2025-03-26 13:50:00", "latest_commit": "2025-03-27 01:03:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "概要チャットLLMにpython関数呼び出し機能を付与するための低品質なデータセットです。 ", "url": "https://huggingface.co/datasets/SousiOmine/Japanese-Pythonic-FunctionCall", "project_name": "Japanese-Pythonic-FunctionCall", "downloads": 73, "source": "Hugging Face", "score": -0.05158671854750497, "first_commit": "2025-03-10 04:02:43", "latest_commit": "2025-03-10 04:11:47", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "alpaca_jp_python alpaca_jp_pythonは、 Stanford Alpacaの手法 mistralai/Mixtral-8x22B-Instruct-v0.1 で作った合成データ(Synthetic data)です。", "url": "https://huggingface.co/datasets/HachiML/alpaca_jp_python", "project_name": "alpaca_jp_python", "downloads": 73, "source": "Hugging Face", "score": -0.05158671854750497, "first_commit": "2024-05-16 02:02:09", "latest_commit": "2024-05-20 01:44:32", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Dataset Summary This dataset is a Japanese-translated version of the OpenO1-SFT dataset, containing Chain of Thought (CoT) reasoning examples designed for fine-tuning language models.", "url": "https://huggingface.co/datasets/Inoichan/OpenO1-SFT-JA", "project_name": "OpenO1-SFT-JA", "downloads": 73, "source": "Hugging Face", "score": -0.05158671854750497, "first_commit": "2025-01-09 16:34:18", "latest_commit": "2025-01-09 16:53:56", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Reasoning", "Text Generation", "Machine Translation", "Language Models", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "You agree to the terms of the LICENSE when using this dataset. ", "url": "https://huggingface.co/datasets/litagin/ehehe-corpus", "project_name": "ehehe-corpus", "downloads": 73, "source": "Hugging Face", "score": -0.05158671854750497, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "BERT large Japanese (character-level tokenization with whole word masking, CC-100 and jawiki-20230102)", "url": "https://huggingface.co/tohoku-nlp/bert-large-japanese-char-v2", "project_name": "bert-large-japanese-char-v2", "downloads": 71, "source": "Hugging Face", "score": -0.05160091679889399, "first_commit": "2023-05-19 00:48:06", "latest_commit": "2023-05-19 00:54:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForPreTraining", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Ninja-v1-RP-WIP 概要 Local-Novel-LLM-project/Ninja-v1-NSFWをロールプレイ用にLoRAでファインチューニングしたモデルです。 ", "url": "https://huggingface.co/Aratako/Ninja-v1-RP-WIP", "project_name": "Ninja-v1-RP-WIP", "downloads": 71, "source": "Hugging Face", "score": -0.05160091679889399, "first_commit": "2024-05-19 15:31:02", "latest_commit": "2024-05-20 16:56:00", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Description This dataset is just a sample of 10341 Hours Unsupervised Spontaneous Japanese Speech Dataset(paid dataset), covers dialogues or monologues in 28 common domains, such as daily vlogs, travel, podcast, technology, beauty, etc.", "url": "https://huggingface.co/datasets/Nexdata/10341_Hours_Unsupervised_Spontaneous_Japanese_Speech_Data", "project_name": "10341_Hours_Unsupervised_Spontaneous_Japanese_Speech_Data", "downloads": 71, "source": "Hugging Face", "score": -0.05160091679889399, "first_commit": "2025-02-10 09:29:15", "latest_commit": "2025-02-11 09:28:14", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Low-Resource NLP" ] }, { "description": "Umamusume-voice-transcription Total charcters: 77 Comes with transcription. ", "url": "https://huggingface.co/datasets/TLME/Umamusume-voice-transcription", "project_name": "Umamusume-voice-transcription", "downloads": 71, "source": "Hugging Face", "score": -0.05160091679889399, "first_commit": "2023-08-03 01:05:31", "latest_commit": "2023-08-04 05:14:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "suzume_mix_v1.0（flux.1 系マージモデル）本モデルは、flux1-dev をベースに、複数のLoRA、モデルをブレンドしたマージモデルです。", "url": "https://huggingface.co/Kotajiro/suzume_mix", "project_name": "suzume_mix", "downloads": 70, "source": "Hugging Face", "score": -0.0516080159245885, "first_commit": "2025-04-12 03:00:31", "latest_commit": "2025-04-12 03:16:45", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "日本語Wikipediaから抽出したテキストに基づいて、rinna/deepseek-r1-distill-qwen2.5-bakeneko-32bとhttps://huggingface.co/cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japaneseを用いて箇条書きにしたデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/jawiki-bullet-points", "project_name": "jawiki-bullet-points", "downloads": 70, "source": "Hugging Face", "score": -0.0516080159245885, "first_commit": "2025-02-12 02:10:19", "latest_commit": "2025-02-17 07:36:28", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This is a Japanese portion of the Guanaco dataset.", "url": "https://huggingface.co/datasets/fujiki/guanaco_ja", "project_name": "guanaco_ja", "downloads": 70, "source": "Hugging Face", "score": -0.0516080159245885, "first_commit": "2023-06-24 08:27:30", "latest_commit": "2023-07-16 15:01:30", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Annotation and Dataset Development" ] }, { "description": "Reference https://huggingface.co/datasets/mc4", "url": "https://huggingface.co/datasets/Atom007/mc4-japanese-data", "project_name": "mc4-japanese-data", "downloads": 69, "source": "Hugging Face", "score": -0.05161511505028301, "first_commit": "2023-07-09 14:56:56", "latest_commit": "2023-07-09 15:04:14", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "各レコードのurl列が出典となります。 ", "url": "https://huggingface.co/datasets/numad/yuho-text-2023", "project_name": "yuho-text-2023", "downloads": 69, "source": "Hugging Face", "score": -0.05161511505028301, "first_commit": "2024-06-14 07:39:01", "latest_commit": "2024-06-14 10:55:42", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "wav2vec2-base-asr", "url": "https://huggingface.co/TKU410410103/wav2vec2-base-japanese-asr", "project_name": "wav2vec2-base-japanese-asr", "downloads": 68, "source": "Hugging Face", "score": -0.051622214175977524, "first_commit": "2024-04-14 10:22:21", "latest_commit": "2024-04-14 14:00:30", "languages": [], "model_or_dataset": "model", "model_size": 0.0945, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Speech Recognition", "Representation Learning", "Text Generation", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "Local-Novel-LLM-project様の Vecteus-V2-7B をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Vecteus-V2-7B-GGUF", "project_name": "Vecteus-V2-7B-GGUF", "downloads": 68, "source": "Hugging Face", "score": -0.051622214175977524, "first_commit": "2024-06-16 05:26:00", "latest_commit": "2024-06-16 11:32:15", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "danbooru-ja-tag-pair-20241015 2024/10/15に作成したdanbooruタグと日本語タグのペアデータセット(約15万件) p1atdev/danbooru-ja-tag-pair-20240715 との違いは、ベースのwikiデータが増えたのでその分対応タグも増えた fasttextでのフィルタリングを挟むようにした「明らかに他言語のタグ」が混じる頻度はちょっと減った気がするけど、完全ではない (calm3くんの処理に)ミスがなければ、最低一つ以上の日本語タグ (other_names フィールド) が存在するはず作成過程 isek-ai/danbooru-wiki-2024 の #202408-at20240906 revision を元に、 other_names (基本的にPixivのタグ)がついているものから、日本語じゃないもの・曖昧・意味の過不足が大きいタグを除去。", "url": "https://huggingface.co/datasets/p1atdev/danbooru-ja-tag-pair-20241015", "project_name": "danbooru-ja-tag-pair-20241015", "downloads": 68, "source": "Hugging Face", "score": -0.051622214175977524, "first_commit": "2024-10-15 14:35:08", "latest_commit": "2024-10-16 12:48:36", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Tagging", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "9.83 Million Pairs of Sentences - Chinese-Japanese Parallel Corpus Data be stored in txt format.", "url": "https://huggingface.co/datasets/Nexdata/Chinese-Japanese_Parallel_Corpus_Data", "project_name": "Chinese-Japanese_Parallel_Corpus_Data", "downloads": 68, "source": "Hugging Face", "score": -0.051622214175977524, "first_commit": "2023-11-08 11:00:00", "latest_commit": "2024-08-09 02:33:26", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "NLLB 1.3B fine-tuned on Japanese to English Light Novel translation This model was fine-tuned on light and web novel for Japanese to English translation.", "url": "https://huggingface.co/thefrigidliquidation/nllb-jaen-1.3B-lightnovels", "project_name": "nllb-jaen-1.3B-lightnovels", "downloads": 67, "source": "Hugging Face", "score": -0.05162931330167204, "first_commit": "2022-10-01 00:43:59", "latest_commit": "2023-06-04 13:38:43", "languages": [], "model_or_dataset": "model", "model_size": 1.37, "model_architectures": "M2M100ForConditionalGeneration", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Semantic Text Processing" ] }, { "description": "Llama-3-8B-Instruct-JP-nk2t-v0.3 Model Details: Built with Meta Llama 3 llama-3-8bの日本語継続学習モデルにChatVectorを適用し、さらにQLoraでファインチューニングしたモデルです。 ", "url": "https://huggingface.co/nk2t/Llama-3-8B-Instruct-japanese-nk2t-v0.3", "project_name": "Llama-3-8B-Instruct-japanese-nk2t-v0.3", "downloads": 67, "source": "Hugging Face", "score": -0.05162931330167204, "first_commit": "2024-05-15 12:24:06", "latest_commit": "2024-05-22 11:02:28", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "固有表現ラベルはllm-book/ner-wikipedia-datasetと同様のものを採用しており、全部で8種類 (人名、法人名、地名、製品名、政治的組織名、施設名、その他の組織名、イベント名)あります。 ", "url": "https://huggingface.co/datasets/llm-book/ner-wikinews-dataset", "project_name": "ner-wikinews-dataset", "downloads": 67, "source": "Hugging Face", "score": -0.05162931330167204, "first_commit": "2023-04-22 14:32:21", "latest_commit": "2023-12-12 11:22:26", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Coreference Resolution", "Named Entity Recognition", "Annotation and Dataset Development" ] }, { "description": "EN/JA dataset used for shisa-7b-v1 - see details in that model's readme.", "url": "https://huggingface.co/datasets/augmxnt/ultra-orca-boros-en-ja-v1", "project_name": "ultra-orca-boros-en-ja-v1", "downloads": 67, "source": "Hugging Face", "score": -0.05162931330167204, "first_commit": "2023-12-06 12:55:59", "latest_commit": "2025-04-12 13:33:38", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF Model creator: MaziyarPanahi Original model: MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1 Description MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF contains GGUF format model files for MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1.", "url": "https://huggingface.co/MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF", "project_name": "japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF", "downloads": 66, "source": "Hugging Face", "score": -0.051636412427366545, "first_commit": "2024-01-28 16:13:26", "latest_commit": "2024-01-28 16:24:30", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "「LLM-jp-3 172B」利用規約この利用規約（以下「本規約」といいます）は、大学共同利用機関法人情報・システム研究機構国立情報学研究所（以下「提供者」といいます）による開発の成果物として公開する大規模言語モデル「LLM-jp-3 172B」（以下「本プログラム」といいます）の利用に関する条件を定めるものです。", "url": "https://huggingface.co/llm-jp/llm-jp-3-172b", "project_name": "llm-jp-3-172b", "downloads": 66, "source": "Hugging Face", "score": -0.051636412427366545, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 172.0, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "japanese-sentiment-analysis This model is the work of jarvisx17 and was trained from scratch on the chABSA dataset.", "url": "https://huggingface.co/RPAmodels/PN-analysis", "project_name": "PN-analysis", "downloads": 66, "source": "Hugging Face", "score": -0.051636412427366545, "first_commit": "2022-11-15 06:28:39", "latest_commit": "2024-09-27 05:20:33", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "思考過程を含む、日本語質問・キーワード・回答・文章の合成データセット fineweb2-edu-japanese の文章データを元に、DeepSeek-R1 で文章(text)から質問文と回答部分の該当箇所を生成した日本語の質問と対応する文章・回答部のデータセットです。", "url": "https://huggingface.co/datasets/hotchpotch/japanese-qa-reasoning-100k", "project_name": "japanese-qa-reasoning-100k", "downloads": 66, "source": "Hugging Face", "score": -0.051636412427366545, "first_commit": "2025-03-10 00:10:59", "latest_commit": "2025-03-24 20:39:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning" ] }, { "description": "このモデルはluke-japanese-baseをファインチューニングして、MARC-ja(positive or negativeの二値分類)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-marcja", "project_name": "luke-japanese-base-marcja", "downloads": 65, "source": "Hugging Face", "score": -0.05164351155306106, "first_commit": "2023-03-02 03:57:33", "latest_commit": "2023-07-21 14:10:48", "languages": [], "model_or_dataset": "model", "model_size": 0.279, "model_architectures": "LukeForSequenceClassification", "multi_labels": [ "Language Models" ] }, { "description": "Model Card for Model ID", "url": "https://huggingface.co/Respair/Japanese_Phoneme_to_Grapheme_LLM", "project_name": "Japanese_Phoneme_to_Grapheme_LLM", "downloads": 65, "source": "Hugging Face", "score": -0.05164351155306106, "first_commit": "2024-09-06 23:01:09", "latest_commit": "2024-09-09 23:16:12", "languages": [], "model_or_dataset": "model", "model_size": 3.09, "model_architectures": "Qwen2Model", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "AIO with extended answers AIO (AI王) is a Japanese quiz dataset.", "url": "https://huggingface.co/datasets/sbintuitions/aio-extended-answers", "project_name": "aio-extended-answers", "downloads": 65, "source": "Hugging Face", "score": -0.05164351155306106, "first_commit": "2024-06-21 08:15:23", "latest_commit": "2024-07-29 08:26:02", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Natural Language Interfaces", "Question Answering", "Annotation and Dataset Development" ] }, { "description": "概要このデータセットはnull-instruct-jaとDeepSeek-v2.5のq4を用いて合成されました。 ", "url": "https://huggingface.co/datasets/DataPilot/Generated-dataset-by-deepseek-v2.5", "project_name": "Generated-dataset-by-deepseek-v2.5", "downloads": 65, "source": "Hugging Face", "score": -0.05164351155306106, "first_commit": "2024-09-11 10:38:57", "latest_commit": "2024-09-11 12:20:29", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Finance Sentiment JA (base) Finance Sentiment JA (base) is a model based on bert-base-japanese for analyzing sentiment of Japanese financial news.", "url": "https://huggingface.co/bardsai/finance-sentiment-ja-base", "project_name": "finance-sentiment-ja-base", "downloads": 64, "source": "Hugging Face", "score": -0.05165061067875557, "first_commit": "2023-09-18 13:00:29", "latest_commit": "2023-09-18 13:01:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Sentiment Analysis" ] }, { "description": "rinna/nekomata-14b-instruction-gguf Overview The model is the GGUF version of rinna/nekomata-14b-instruction.", "url": "https://huggingface.co/rinna/nekomata-14b-instruction-gguf", "project_name": "nekomata-14b-instruction-gguf", "downloads": 64, "source": "Hugging Face", "score": -0.05165061067875557, "first_commit": "2023-12-19 08:12:06", "latest_commit": "2024-07-20 08:34:05", "languages": [], "model_or_dataset": "model", "model_size": 14.2, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "sarashina2.2-3b-RP-v0.1 GGUF版はこちら/Click here for the GGUF version 概要 sbintuitions/sarashina2.2-3b-instruct-v0.1をベースにロールプレイ用にファインチューニングしたモデルです。 ", "url": "https://huggingface.co/Aratako/sarashina2.2-3b-RP-v0.1", "project_name": "sarashina2.2-3b-RP-v0.1", "downloads": 64, "source": "Hugging Face", "score": -0.05165061067875557, "first_commit": "2025-04-01 16:34:25", "latest_commit": "2025-04-06 06:38:54", "languages": [], "model_or_dataset": "model", "model_size": 3.36, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "About weighted/imatrix quants of https://huggingface.co/owner203/japanese-llama-3-8b-instruct-v2 static quants are available at https://huggingface.co/mradermacher/japanese-llama-3-8b-instruct-v2-GGUF Usage", "url": "https://huggingface.co/mradermacher/japanese-llama-3-8b-instruct-v2-i1-GGUF", "project_name": "japanese-llama-3-8b-instruct-v2-i1-GGUF", "downloads": 64, "source": "Hugging Face", "score": -0.05165061067875557, "first_commit": "2025-01-13 03:13:28", "latest_commit": "2025-01-13 04:10:38", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [] }, { "description": "Gendec: Gender Dection from Japanese Names with Machine Learning", "url": "https://huggingface.co/datasets/tarudesu/gendec-dataset", "project_name": "gendec-dataset", "downloads": 64, "source": "Hugging Face", "score": -0.05165061067875557, "first_commit": "2023-11-14 01:59:12", "latest_commit": "2024-03-23 16:58:27", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Magpie-Tanuki-8B-97k Magpieの手法をweblab-GENIAC/Tanuki-8B-dpo-v1.0に対して適用し作成した、97269件の日本語対話データセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Magpie-Tanuki-8B-97k", "project_name": "Magpie-Tanuki-8B-97k", "downloads": 64, "source": "Hugging Face", "score": -0.05165061067875557, "first_commit": "2024-10-03 14:02:26", "latest_commit": "2024-10-03 14:07:49", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Local-Novel-LLM-project様の Ninja-V2-7B をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Ninja-V2-7B-GGUF", "project_name": "Ninja-V2-7B-GGUF", "downloads": 63, "source": "Hugging Face", "score": -0.05165770980445008, "first_commit": "2024-06-15 16:23:41", "latest_commit": "2024-06-15 21:25:59", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Filtered and modified version of Japanese/Chinese language pair data from WikiMatrix v1.", "url": "https://huggingface.co/datasets/larryvrh/WikiMatrix-v1-Ja_Zh-filtered", "project_name": "WikiMatrix-v1-Ja_Zh-filtered", "downloads": 63, "source": "Hugging Face", "score": -0.05165770980445008, "first_commit": "2023-04-08 03:07:25", "latest_commit": "2023-04-08 05:16:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Semantic Similarity", "Semantic Text Processing" ] }, { "description": "GitHub リポジトリ cl-tohoku/quiz-datasets で公開されているデータセットを利用しています。 ", "url": "https://huggingface.co/datasets/llm-book/aio-passages", "project_name": "aio-passages", "downloads": 63, "source": "Hugging Face", "score": -0.05165770980445008, "first_commit": "2023-06-06 02:03:34", "latest_commit": "2023-06-24 05:55:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Retrieval", "Annotation and Dataset Development" ] }, { "description": "OpenMathInstruct-1 を日本語に自動翻訳した商用利用可能な180万件の指示チューニングデータセットになります。 ", "url": "https://huggingface.co/datasets/kunishou/OpenMathInstruct-1-1.8m-ja", "project_name": "OpenMathInstruct-1-1.8m-ja", "downloads": 63, "source": "Hugging Face", "score": -0.05165770980445008, "first_commit": "2024-02-23 16:31:34", "latest_commit": "2024-02-24 18:29:28", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Numerical Reasoning", "Reasoning" ] }, { "description": "Synthetic-Japanese-Roleplay-gpt-4o-mini-19.8k 概要 gpt-4o-miniを用いて作成した、約19800件の日本語ロールプレイの対話を収録した合成データセットです。", "url": "https://huggingface.co/datasets/Aratako/Synthetic-Japanese-Roleplay-gpt-4o-mini-19.8k", "project_name": "Synthetic-Japanese-Roleplay-gpt-4o-mini-19.8k", "downloads": 63, "source": "Hugging Face", "score": -0.05165770980445008, "first_commit": "2024-08-16 14:35:16", "latest_commit": "2024-08-16 16:45:26", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "luke-japanese-base-lite-xlm-roberta studio-ousia/luke-japanese-base-liteの重みの名前をXLMRoberta形式に置き換え、XLMRobertaモデルとして扱えるようにした物です。 ", "url": "https://huggingface.co/hotchpotch/luke-japanese-base-lite-xlm-roberta", "project_name": "luke-japanese-base-lite-xlm-roberta", "downloads": 62, "source": "Hugging Face", "score": -0.05166480893014459, "first_commit": "2024-09-09 18:18:38", "latest_commit": "2024-09-09 18:33:44", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "XLMRobertaForMaskedLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "shisa-v1-qwen2-7b-gguf (English explanation is below.", "url": "https://huggingface.co/keitokei1994/shisa-v1-qwen2-7b-GGUF", "project_name": "shisa-v1-qwen2-7b-GGUF", "downloads": 62, "source": "Hugging Face", "score": -0.05166480893014459, "first_commit": "2024-06-09 08:58:45", "latest_commit": "2024-07-04 07:44:00", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "DataPilot/Zero_SFT_Ja_v2_b3t4 このデータセットは、日本語で記述された高品質な合成プロンプトとそのAI出力を収録しています。", "url": "https://huggingface.co/datasets/DataPilot/Zero_SFT_Ja_v2", "project_name": "Zero_SFT_Ja_v2", "downloads": 62, "source": "Hugging Face", "score": -0.05166480893014459, "first_commit": "2025-04-11 19:21:55", "latest_commit": "2025-04-16 21:11:58", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Fugaku-LLM利用規約この利用規約（以下「本規約」といいます）は、富士通株式会社、国立研究開発法人理化学研究所、国立大学法人東京工業大学、国立大学法人東北大学、株式会社サイバーエージェント、国立大学法人東海国立大学機構、及び株式会社Kotoba Technologies Japan (以下「開発者」といいます)による、スーパーコンピュータ「富岳」政策対応枠における大規模言語モデル分散並列学習手法の開発の成果物として公開する大規模言語モデル（以下「Fugaku-LLM」といいます）の利用に関する条件を定めるものです。", "url": "https://huggingface.co/Fugaku-LLM/Fugaku-LLM-13B-instruct-gguf", "project_name": "Fugaku-LLM-13B-instruct-gguf", "downloads": 61, "source": "Hugging Face", "score": -0.0516719080558391, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 13.4, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Dataset Details Dataset Sources Repository: Helsinki-NLP/Tatoeba-Challenge Detail: Japanese - Korean jpn-kor Uses The dataset can be used to train the translation model that translates Japanese sentence to Korean.", "url": "https://huggingface.co/datasets/sappho192/Tatoeba-Challenge-jpn-kor", "project_name": "Tatoeba-Challenge-jpn-kor", "downloads": 61, "source": "Hugging Face", "score": -0.0516719080558391, "first_commit": "2024-01-30 01:01:30", "latest_commit": "2024-01-30 16:51:21", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Information Extraction & Text Mining", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "iterative-dpo-data-for-SimPO-iter2 概要合成instructionデータであるAratako/Magpie-Tanuki-Instruction-Selected-Evolved-26.5kを元に以下のような手順で作成した日本語Preferenceデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/iterative-dpo-data-for-SimPO-iter2", "project_name": "iterative-dpo-data-for-SimPO-iter2", "downloads": 61, "source": "Hugging Face", "score": -0.0516719080558391, "first_commit": "2024-12-04 03:24:08", "latest_commit": "2024-12-15 06:33:44", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation" ] }, { "description": "LLMChat-Judge-Results team-hatakeyama-phase2/LLMChatの2つのモデルの応答に対して、様々なモデルを用いてPairwise評価を行った結果のデータです。 ", "url": "https://huggingface.co/datasets/Aratako/LLMChat-Judge-Results", "project_name": "LLMChat-Judge-Results", "downloads": 61, "source": "Hugging Face", "score": -0.0516719080558391, "first_commit": "2024-10-26 06:17:00", "latest_commit": "2024-10-26 07:22:39", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Here we share a Japanese dataset synthesized using the OpenAI GPT-4 model with Self-Instruct, utilizing some excess Azure credits.", "url": "https://huggingface.co/datasets/CausalLM/GPT-4-Self-Instruct-Japanese", "project_name": "GPT-4-Self-Instruct-Japanese", "downloads": 61, "source": "Hugging Face", "score": -0.0516719080558391, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Similarity", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "DataPilot/Zero_SFT_Ja_v2_b3t4 このデータセットは、日本語で記述された高品質な合成プロンプトとそのAI出力を収録しています。", "url": "https://huggingface.co/datasets/DataPilot/Zero_SFT_Ja_v2_b3t4", "project_name": "Zero_SFT_Ja_v2_b3t4", "downloads": 60, "source": "Hugging Face", "score": -0.051679007181533614, "first_commit": "2025-04-11 03:52:02", "latest_commit": "2025-04-11 03:57:58", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "About static quants of https://huggingface.co/cyberagent/Mistral-Nemo-Japanese-Instruct-2408 weighted/imatrix quants are available at https://huggingface.co/mradermacher/Mistral-Nemo-Japanese-Instruct-2408-i1-GGUF Usage If you are unsure how to use GGUF files, refer to one of TheBloke's READMEs for more details, including on how to concatenate multi-part files.", "url": "https://huggingface.co/mradermacher/Mistral-Nemo-Japanese-Instruct-2408-GGUF", "project_name": "Mistral-Nemo-Japanese-Instruct-2408-GGUF", "downloads": 59, "source": "Hugging Face", "score": -0.05168610630722813, "first_commit": "2025-02-24 16:22:58", "latest_commit": "2025-02-27 21:47:12", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": null, "multi_labels": [ "Dialogue Response Generation", "Syntactic Text Processing" ] }, { "description": "「LLM-jp-3 172B beta2」利用規約この利用規約（以下「本規約」といいます）は、大学共同利用機関法人情報・システム研究機構国立情報学研究所（以下「提供者」といいます）による開発の成果物として公開する大規模言語モデル「LLM-jp-3 172B beta2」（以下「本プログラム」といいます）の利用に関する条件を定めるものです。", "url": "https://huggingface.co/llm-jp/llm-jp-3-172b-beta2", "project_name": "llm-jp-3-172b-beta2", "downloads": 59, "source": "Hugging Face", "score": -0.05168610630722813, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 172.0, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "QuantFactory/Llama-3.1-Swallow-8B-v0.1-GGUF This is quantized version of tokyotech-llm/Llama-3.1-Swallow-8B-v0.1 created using llama.cpp Original Model Card Llama 3.1 Swallow - Built with Llama Llama 3.1 Swallow is a series of large language models (8B, 70B) that were built by continual pre-training on the Meta Llama 3.1 models.", "url": "https://huggingface.co/QuantFactory/Llama-3.1-Swallow-8B-v0.1-GGUF", "project_name": "Llama-3.1-Swallow-8B-v0.1-GGUF", "downloads": 59, "source": "Hugging Face", "score": -0.05168610630722813, "first_commit": "2024-10-31 06:41:33", "latest_commit": "2024-10-31 07:25:56", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "日本語指示・推論・回答データセット概要このリポジトリは、SkunkworksAI/reasoning-0.01 に含まれるインストラクションデータを基に、Qwen/Qwen2.5-32B-Instruct モデルを用いて作成した日本語版の指示・推論・回答データセットです。", "url": "https://huggingface.co/datasets/DeL-TaiseiOzaki/reasoning-finetuning-ja", "project_name": "reasoning-finetuning-ja", "downloads": 59, "source": "Hugging Face", "score": -0.05168610630722813, "first_commit": "2024-10-12 15:26:10", "latest_commit": "2024-10-12 15:45:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "llm-jp-3-13b-instruct2 LLM-jp-3 is the series of large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-13b-instruct2", "project_name": "llm-jp-3-13b-instruct2", "downloads": 58, "source": "Hugging Face", "score": -0.051693205432922634, "first_commit": "2025-01-27 07:27:28", "latest_commit": "2025-02-04 04:58:56", "languages": [], "model_or_dataset": "model", "model_size": 13.7, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Model Details Model Description This repository provides Asagi-8B, a large-scale Japanese Vision & Language Model (VLM).", "url": "https://huggingface.co/MIL-UT/Asagi-8B", "project_name": "Asagi-8B", "downloads": 58, "source": "Hugging Face", "score": -0.051693205432922634, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 7.76, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "bert-base-japanese-v3-jcommonsenseqa 「大規模言語モデル入門」の第5章で紹介している(多肢選択式質問応答)のモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-jcommonsenseqa", "project_name": "bert-base-japanese-v3-jcommonsenseqa", "downloads": 58, "source": "Hugging Face", "score": -0.051693205432922634, "first_commit": "2023-06-20 07:01:53", "latest_commit": "2023-07-24 06:49:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMultipleChoice", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "alabnii/jmedroberta-base-sentencepiece Model description This is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", "url": "https://huggingface.co/alabnii/jmedroberta-base-sentencepiece", "project_name": "jmedroberta-base-sentencepiece", "downloads": 58, "source": "Hugging Face", "score": -0.051693205432922634, "first_commit": "2022-12-22 17:20:33", "latest_commit": "2023-03-21 23:57:37", "languages": [], "model_or_dataset": "model", "model_size": 0.109, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Ruri: Japanese General Text Embeddings Usage First install the Sentence Transformers library: pip install -U sentence-transformers Then you can load this model and run inference.", "url": "https://huggingface.co/cl-nagoya/ruri-pt-base", "project_name": "ruri-pt-base", "downloads": 58, "source": "Hugging Face", "score": -0.051693205432922634, "first_commit": "2024-08-17 10:38:19", "latest_commit": "2024-09-13 01:38:07", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "QuantFactory/Llama3-ArrowSE-8B-v0.3-GGUF This is quantized version of DataPilot/Llama3-ArrowSE-8B-v0.3 created using llama.cpp Original Model Card 概要 elyza/Llama-3-ELYZA-JP-8Bを元にchat vectorを用いて改良しAItuberに特化させました。 ", "url": "https://huggingface.co/QuantFactory/Llama3-ArrowSE-8B-v0.3-GGUF", "project_name": "Llama3-ArrowSE-8B-v0.3-GGUF", "downloads": 58, "source": "Hugging Face", "score": -0.051693205432922634, "first_commit": "2024-07-28 15:51:47", "latest_commit": "2024-07-28 16:29:51", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Update: 2023/12/25oasst2-135k-jaをチャット形式に変換したoasst2-chat-68k-jaを公開しました。 ", "url": "https://huggingface.co/datasets/kunishou/oasst2-135k-ja", "project_name": "oasst2-135k-ja", "downloads": 58, "source": "Hugging Face", "score": -0.051693205432922634, "first_commit": "2023-12-24 22:04:54", "latest_commit": "2023-12-25 13:23:55", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "RetrievaBERT Model The RetrievaBERT is the pre-trained Transformer Encoder using Megatron-LM.", "url": "https://huggingface.co/retrieva-jp/bert-1.3b", "project_name": "bert-1.3b", "downloads": 57, "source": "Hugging Face", "score": -0.05170030455861715, "first_commit": "2024-06-25 06:18:24", "latest_commit": "2024-07-09 05:36:08", "languages": [], "model_or_dataset": "model", "model_size": 1.3, "model_architectures": "RetrievaBertForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "reazonspeech-espnet-next ReazonSpeech is a project to maintain freely-available Japanese audio datasets and ML models.", "url": "https://huggingface.co/reazon-research/reazonspeech-espnet-next", "project_name": "reazonspeech-espnet-next", "downloads": 57, "source": "Hugging Face", "score": -0.05170030455861715, "first_commit": "2023-03-29 07:20:03", "latest_commit": "2023-03-29 17:28:01", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP" ] }, { "description": "Heron GIT Japanese StableLM", "url": "https://huggingface.co/turing-motors/heron-chat-git-ja-stablelm-base-7b-v0", "project_name": "heron-chat-git-ja-stablelm-base-7b-v0", "downloads": 57, "source": "Hugging Face", "score": -0.05170030455861715, "first_commit": "2023-09-06 09:19:59", "latest_commit": "2023-09-11 16:55:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GitJapaneseStableLMAlphaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Qwen2.5 Bakeneko 32B (rinna/qwen2.5-bakeneko-32b)", "url": "https://huggingface.co/rinna/qwen2.5-bakeneko-32b", "project_name": "qwen2.5-bakeneko-32b", "downloads": 57, "source": "Hugging Face", "score": -0.05170030455861715, "first_commit": "2025-02-10 16:49:49", "latest_commit": "2025-02-13 01:17:58", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "ELYZA-japanese-Llama-2-13b-fast-instruct-gguf ELYZA-japanese-Llama-2-13b-fast-instructの GGUF 変換モデルです。", "url": "https://huggingface.co/schroneko/ELYZA-japanese-Llama-2-13b-fast-instruct-gguf", "project_name": "ELYZA-japanese-Llama-2-13b-fast-instruct-gguf", "downloads": 57, "source": "Hugging Face", "score": -0.05170030455861715, "first_commit": "2024-01-23 14:14:39", "latest_commit": "2024-01-25 06:30:57", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "Tanuki-8B-dpo-v1.0-4k-GPTQ-4bit 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0-4kのGPTQ 4bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-4k-GPTQ-4bit", "project_name": "Tanuki-8B-dpo-v1.0-4k-GPTQ-4bit", "downloads": 57, "source": "Hugging Face", "score": -0.05170030455861715, "first_commit": "2024-08-27 16:47:29", "latest_commit": "2024-09-03 09:28:03", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "magpie-japanese-math-instruction-17k-qwen2.5-bakeneko-32b-instruct rinna/qwen2.5-bakeneko-32b-instructを用いたMagpieで生成した合成Instructionデータセットです。 ", "url": "https://huggingface.co/datasets/Kendamarron/magpie-japanese-math-instruction-17k-qwen2.5-bakeneko-32b-instruct", "project_name": "magpie-japanese-math-instruction-17k-qwen2.5-bakeneko-32b-instruct", "downloads": 57, "source": "Hugging Face", "score": -0.05170030455861715, "first_commit": "2025-02-21 16:08:55", "latest_commit": "2025-03-20 15:36:04", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Numerical Reasoning", "Reasoning", "Language Models", "Low-Resource NLP" ] }, { "description": "Japanese stopwords for nagisa", "url": "https://huggingface.co/datasets/taishi-i/nagisa_stopwords", "project_name": "nagisa_stopwords", "downloads": 57, "source": "Hugging Face", "score": -0.05170030455861715, "first_commit": "2023-08-06 17:10:10", "latest_commit": "2023-08-07 02:58:31", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Morphology", "Annotation and Dataset Development" ] }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-small-medium", "project_name": "t5-small-medium", "downloads": 56, "source": "Hugging Face", "score": -0.05170740368431166, "first_commit": "2023-04-26 08:26:19", "latest_commit": "2023-05-10 10:01:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "The dataset contains (almost) the entire OpenSubtittles database for Japanese: Over 7000 tv shows and/or movies.", "url": "https://huggingface.co/datasets/Nan-Do/OpenSubtitlesJapanese", "project_name": "OpenSubtitlesJapanese", "downloads": 56, "source": "Hugging Face", "score": -0.05170740368431166, "first_commit": "2023-04-18 07:43:48", "latest_commit": "2023-04-19 09:34:45", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining" ] }, { "description": "Synthetic-Japanese-Roleplay-gpt-4o-mini-39.6k 20240907 データ増量（約19800件→約39600件）概要 gpt-4o-miniを用いて作成した、約39600件の日本語ロールプレイの対話を収録した合成データセットです。", "url": "https://huggingface.co/datasets/Aratako/Synthetic-Japanese-Roleplay-gpt-4o-mini-39.6k", "project_name": "Synthetic-Japanese-Roleplay-gpt-4o-mini-39.6k", "downloads": 56, "source": "Hugging Face", "score": -0.05170740368431166, "first_commit": "2024-08-16 14:35:16", "latest_commit": "2024-09-07 12:33:18", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Response Generation", "Dialogue Systems & Conversational Agents", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "ScreenTalk_JA ScreenTalk_JA is a paired dataset of Japanese speech and Chinese translated text released by DataLabX.", "url": "https://huggingface.co/datasets/DataLabX/ScreenTalk_JA", "project_name": "ScreenTalk_JA", "downloads": 56, "source": "Hugging Face", "score": -0.05170740368431166, "first_commit": "2025-04-18 23:05:35", "latest_commit": "2025-04-19 14:57:00", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "luke-large-defamation-detection-japanese 日本語誹謗中傷検出器", "url": "https://huggingface.co/kubota/luke-large-defamation-detection-japanese", "project_name": "luke-large-defamation-detection-japanese", "downloads": 55, "source": "Hugging Face", "score": -0.05171450281000617, "first_commit": "2023-01-23 06:25:08", "latest_commit": "2023-02-07 15:49:33", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForSequenceClassification", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-LongChat", "project_name": "Orion-14B-LongChat", "downloads": 55, "source": "Hugging Face", "score": -0.05171450281000617, "first_commit": "2024-01-19 07:15:36", "latest_commit": "2024-03-26 10:10:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OrionForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "Japanese-TextGen-Kage-v0.1-2x7B Kage is \"影\" in Japanese or \"Shadow\" in English.", "url": "https://huggingface.co/dddump/Japanese-TextGen-Kage-v0.1-2x7B-gguf", "project_name": "Japanese-TextGen-Kage-v0.1-2x7B-gguf", "downloads": 55, "source": "Hugging Face", "score": -0.05171450281000617, "first_commit": "2024-05-04 07:03:38", "latest_commit": "2024-05-19 08:54:19", "languages": [], "model_or_dataset": "model", "model_size": 12.9, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ChouBun Dataset Description ChouBun is a benchmark for assessing LLMs' performance in long-context tasks in the Japanese language.", "url": "https://huggingface.co/datasets/SakanaAI/ChouBun", "project_name": "ChouBun", "downloads": 55, "source": "Hugging Face", "score": -0.05171450281000617, "first_commit": "2024-09-02 04:15:52", "latest_commit": "2024-10-18 02:45:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Natural Language Interfaces", "Question Answering", "Summarization", "Annotation and Dataset Development" ] }, { "description": "Self-Instruct-Qwen2.5-72B-Instruct-60k 概要以下の手順で作成した約6万件の日本語の合成instructionデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Self-Instruct-Qwen2.5-72B-Instruct-60k", "project_name": "Self-Instruct-Qwen2.5-72B-Instruct-60k", "downloads": 55, "source": "Hugging Face", "score": -0.05171450281000617, "first_commit": "2024-12-13 02:56:47", "latest_commit": "2024-12-16 17:21:59", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "JDocQA_SingleImage Dataset Dataset Summary JDocQA_SingleImageは、shunk031/JDocQAのtestサブセットを基に作成されたデータセットで、PDFファイルを200dpiの画像に変換し、画像が取得できない設問と複数画像が必要な設問を除外しています。", "url": "https://huggingface.co/datasets/umiyuki/JDocQA_SingleImage", "project_name": "JDocQA_SingleImage", "downloads": 55, "source": "Hugging Face", "score": -0.05171450281000617, "first_commit": "2025-03-24 09:10:09", "latest_commit": "2025-03-25 04:37:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Visual Data in NLP", "Question Answering", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Convert from: drewschaub/whisper-large-v3-japanese-4k-steps Whisper large-v3 model for CTranslate2 This repository contains the conversion of drewschaub/whisper-large-v3-japanese-4k-steps to the CTranslate2 model format.", "url": "https://huggingface.co/JhonVanced/whisper-large-v3-japanese-4k-steps-ct2", "project_name": "whisper-large-v3-japanese-4k-steps-ct2", "downloads": 54, "source": "Hugging Face", "score": -0.05172160193570068, "first_commit": "2024-02-20 13:41:17", "latest_commit": "2024-02-22 01:11:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Ninja-v1-RP-expressive GGUF版はこちら/Click here for the GGUF version 概要 This is a merge of pre-trained language models created using mergekit. ", "url": "https://huggingface.co/Aratako/Ninja-v1-RP-expressive", "project_name": "Ninja-v1-RP-expressive", "downloads": 54, "source": "Hugging Face", "score": -0.05172160193570068, "first_commit": "2024-05-21 12:11:38", "latest_commit": "2024-05-24 15:11:43", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Model llm-jp/llm-jp-3-3.7b-instructをCoTデータでファインチューニングすることで作成したreasoningモデルです。 ", "url": "https://huggingface.co/Kendamarron/llm-jp-3-3.7b-o1-v0.1", "project_name": "llm-jp-3-3.7b-o1-v0.1", "downloads": 54, "source": "Hugging Face", "score": -0.05172160193570068, "first_commit": "2024-12-07 13:34:12", "latest_commit": "2024-12-10 14:56:58", "languages": [], "model_or_dataset": "model", "model_size": 3.78, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Dataset Description This is the Japanese Translation version of piqa.", "url": "https://huggingface.co/datasets/izumi-lab/piqa-ja-mbartm2m", "project_name": "piqa-ja-mbartm2m", "downloads": 54, "source": "Hugging Face", "score": -0.05172160193570068, "first_commit": "2023-05-18 22:11:08", "latest_commit": "2023-05-19 04:20:04", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "Introduction This is a LLM-filtered set of the first 1M rows from ntt's JParaCrawl v3 large English-Japanese parallel corpus.", "url": "https://huggingface.co/datasets/Verah/JParaCrawl-Filtered-English-Japanese-Parallel-Corpus", "project_name": "JParaCrawl-Filtered-English-Japanese-Parallel-Corpus", "downloads": 54, "source": "Hugging Face", "score": -0.05172160193570068, "first_commit": "2024-03-01 06:17:09", "latest_commit": "2024-03-07 21:20:21", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/ogiri-debug\", split=\"test\") 概要大喜利生成の動作確認用データセットです。", "url": "https://huggingface.co/datasets/YANS-official/ogiri-debug", "project_name": "ogiri-debug", "downloads": 54, "source": "Hugging Face", "score": -0.05172160193570068, "first_commit": "2024-08-30 04:18:35", "latest_commit": "2024-08-30 14:52:03", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Text Generation", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Anime Songs Lyrics Dataset ― アニメソングの歌詞データセット Welcome to the Anime Songs Lyrics Dataset Overview This dataset compiles a diverse collection of lyrics from various anime songs, providing a rich resource for enthusiasts and researchers alike.", "url": "https://huggingface.co/datasets/mohamed-khalil/AnimeSongsLyrics", "project_name": "AnimeSongsLyrics", "downloads": 54, "source": "Hugging Face", "score": -0.05172160193570068, "first_commit": "2024-03-05 17:17:25", "latest_commit": "2024-03-05 18:02:39", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Sakura_dataset 商用利用可能な超小規模高品質日本語データセット。 ", "url": "https://huggingface.co/datasets/saldra/sakura_japanese_dataset", "project_name": "sakura_japanese_dataset", "downloads": 54, "source": "Hugging Face", "score": -0.05172160193570068, "first_commit": "2023-06-07 05:44:23", "latest_commit": "2023-06-08 11:31:06", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning", "Commonsense Reasoning" ] }, { "description": "Details: https://spacy.io/models/ja#ja_core_news_lg Japanese pipeline optimized for CPU.", "url": "https://huggingface.co/spacy/ja_core_news_lg", "project_name": "ja_core_news_lg", "downloads": 53, "source": "Hugging Face", "score": -0.051728701061395196, "first_commit": "2021-07-07 12:08:11", "latest_commit": "2023-10-10 06:46:01", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Representation Learning", "Syntactic Text Processing", "Named Entity Recognition", "Semantic Text Processing" ] }, { "description": "SambaLingo-Japanese-Base SambaLingo-Japanese-Base is a pretrained Bi-lingual Japanese and English model that adapts Llama-2-7b to Japanese by training on 42 billion tokens from the Japanese split of the Cultura-X dataset.", "url": "https://huggingface.co/sambanovasystems/SambaLingo-Japanese-Base", "project_name": "SambaLingo-Japanese-Base", "downloads": 53, "source": "Hugging Face", "score": -0.051728701061395196, "first_commit": "2024-02-15 22:49:08", "latest_commit": "2024-04-16 22:33:28", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "Japanese Wikipedia Human Retrieval dataset This is a Japanese question answereing dataset with retrieval on Wikipedia articles by trained human workers.", "url": "https://huggingface.co/datasets/baobab-trees/wikipedia-human-retrieval-ja", "project_name": "wikipedia-human-retrieval-ja", "downloads": 53, "source": "Hugging Face", "score": -0.051728701061395196, "first_commit": "2024-01-15 13:52:30", "latest_commit": "2024-03-19 04:25:44", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Natural Language Interfaces", "Information Retrieval", "Question Answering", "Annotation and Dataset Development" ] }, { "description": "以下の条件に同意したうえで、公開されたモデル及びデータセット等（以下「本コンテンツ」）といいます）をダウンロードします。 ", "url": "https://huggingface.co/datasets/weblab-GENIAC/aya-ja-evol-instruct-calm3-dpo-masked", "project_name": "aya-ja-evol-instruct-calm3-dpo-masked", "downloads": 53, "source": "Hugging Face", "score": -0.051728701061395196, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "JDocQA_SingleImage_200 Dataset Dataset Summary JDocQA_SingleImage_200は、shunk031/JDocQAのtestサブセットを基に作成されたデータセットで、PDFファイルを200dpiの画像に変換し、画像が取得できない設問と複数画像が必要な設問を除外しています。", "url": "https://huggingface.co/datasets/umiyuki/JDocQA_SingleImage_200", "project_name": "JDocQA_SingleImage_200", "downloads": 53, "source": "Hugging Face", "score": -0.051728701061395196, "first_commit": "2025-03-24 20:14:13", "latest_commit": "2025-03-25 04:40:06", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Annotation and Dataset Development" ] }, { "description": "roberta-base-japanese-aozora-char Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-aozora-char", "project_name": "roberta-base-japanese-aozora-char", "downloads": 52, "source": "Hugging Face", "score": -0.0517358001870897, "first_commit": "2021-12-27 09:14:03", "latest_commit": "2022-06-21 14:50:52", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "bert-base-japanese-v3-bpr-question-aio 「大規模言語モデル入門」の第9章で紹介している文書検索モデルBPRの質問エンコーダです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-bpr-question-aio", "project_name": "bert-base-japanese-v3-bpr-question-aio", "downloads": 52, "source": "Hugging Face", "score": -0.0517358001870897, "first_commit": "2023-06-06 08:21:13", "latest_commit": "2023-07-24 07:12:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertModel", "multi_labels": [ "Natural Language Interfaces", "Language Models", "Semantic Text Processing" ] }, { "description": "[Llama-3.1-70B-EZO-1.1-it] Model Card モデル情報 / Model Information このモデルは、Meta AI の Llama 3.1 をベースに、日本語タスクでの性能を向上させるためにファインチューニングを行ったものです。", "url": "https://huggingface.co/AXCXEPT/Llama-3.1-70B-EZO-1.1-it", "project_name": "Llama-3.1-70B-EZO-1.1-it", "downloads": 52, "source": "Hugging Face", "score": -0.0517358001870897, "first_commit": "2024-07-29 01:35:35", "latest_commit": "2024-08-23 10:52:31", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "NuminaMath Enhanced CoT Dataset (Japanese 50k Subset)", "url": "https://huggingface.co/datasets/Inoichan/NuminaMath-Enhanced-CoT-JA-50K", "project_name": "NuminaMath-Enhanced-CoT-JA-50K", "downloads": 52, "source": "Hugging Face", "score": -0.0517358001870897, "first_commit": "2025-01-05 15:26:36", "latest_commit": "2025-01-13 14:53:42", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning" ] }, { "description": "以下のデータ源からランダムに抽出した日本語のテキストをPhi-3で再生成し､更に自動英訳したコーパスです｡ Wikibooks Wikipedia コードこちら一部の計算には東京工業大学のスーパーコンピュータTSUBAME4.0を利用しました｡データ parquetファイルが数十GB程度あります datasetsライブラリからでは､はじめの数GB程度しか読み込めない可能性があります｡git lfsなどでダウンロードする必要がありそうです｡", "url": "https://huggingface.co/datasets/kanhatakeyama/SyntheticTextWikiTranslate", "project_name": "SyntheticTextWikiTranslate", "downloads": 52, "source": "Hugging Face", "score": -0.0517358001870897, "first_commit": "2024-06-13 06:50:51", "latest_commit": "2024-07-16 07:31:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "AttaQ-JA Dataset Card AttaQ red teaming dataset was designed to evaluate Large Language Models (LLMs) by assessing their tendency to generate harmful or undesirable responses, which consists of 1402 carefully crafted adversarial questions.", "url": "https://huggingface.co/datasets/ibm/AttaQ-JA", "project_name": "AttaQ-JA", "downloads": 52, "source": "Hugging Face", "score": -0.0517358001870897, "first_commit": "2024-09-27 00:04:13", "latest_commit": "2024-11-05 05:17:50", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "(English part follows Japanese one.", "url": "https://huggingface.co/tohoku-nlp/stable-diffusion-xl-jp-refiner-1.0", "project_name": "stable-diffusion-xl-jp-refiner-1.0", "downloads": 51, "source": "Hugging Face", "score": -0.05174289931278422, "first_commit": "2023-11-06 05:01:57", "latest_commit": "2023-11-06 05:12:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "◆REV-Mix \"レボリューション\"なモデルです。 ", "url": "https://huggingface.co/Hemlok/REV-Mix", "project_name": "REV-Mix", "downloads": 51, "source": "Hugging Face", "score": -0.05174289931278422, "first_commit": "2023-08-06 17:04:53", "latest_commit": "2023-08-26 16:19:02", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "OpenOrcaデータセットの日本語翻訳版です https://huggingface.co/datasets/Open-Orca/OpenOrca 現在翻訳作業が続行中で、OpenOrca全体の1/5程度の翻訳が終わった状態でひとまず公開します。", "url": "https://huggingface.co/datasets/shi3z/OpenOrcaJapanese", "project_name": "OpenOrcaJapanese", "downloads": 51, "source": "Hugging Face", "score": -0.05174289931278422, "first_commit": "2023-10-27 06:15:27", "latest_commit": "2023-10-28 02:50:27", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces" ] }, { "description": "VOICEVOXを使った人工音声ボイスデータセット使用したテキストコーパス ITAコーパスつくよみちゃんコーパス ROHANコーパスデータセット量情報フォルダ内の.", "url": "https://huggingface.co/datasets/ayousanz/voicevox-voice-corpus", "project_name": "voicevox-voice-corpus", "downloads": 51, "source": "Hugging Face", "score": -0.05174289931278422, "first_commit": "2024-05-19 03:01:27", "latest_commit": "2024-05-26 04:13:36", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "Retrieval-Based Multi-Turn Chat SFT Synthetic Data A year ago, we released CausalLM/Refined-Anime-Text, a thematic subset of a dataset generated using the then state-of-the-art LLMs.", "url": "https://huggingface.co/datasets/CausalLM/Retrieval-SFT-Chat", "project_name": "Retrieval-SFT-Chat", "downloads": 51, "source": "Hugging Face", "score": -0.05174289931278422, "first_commit": "2025-02-11 14:41:46", "latest_commit": "2025-02-14 22:09:30", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Information Retrieval", "Dialogue Systems & Conversational Agents" ] }, { "description": "Synthetic-Japanese-Roleplay-SFW-DeepSeek-V3-0324-20k-formatted 概要 deepseek-ai/DeepSeek-V3-0324を用いて作成した日本語ロールプレイデータセットであるAratako/Synthetic-Japanese-Roleplay-SFW-DeepSeek-V3-0324-20kにsystem messageを追加して整形したデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-Japanese-Roleplay-SFW-DeepSeek-V3-0324-20k-formatted", "project_name": "Synthetic-Japanese-Roleplay-SFW-DeepSeek-V3-0324-20k-formatted", "downloads": 51, "source": "Hugging Face", "score": -0.05174289931278422, "first_commit": "2025-04-22 16:12:17", "latest_commit": "2025-04-23 14:10:24", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multimodality" ] }, { "description": "fineweb-2-japanese-noise-spans このデータセットは、FineWeb2 の日本語データから、Web特有のノイズ箇所を判定したデータセットです。", "url": "https://huggingface.co/datasets/hotchpotch/fineweb-2-japanese-noise-spans", "project_name": "fineweb-2-japanese-noise-spans", "downloads": 51, "source": "Hugging Face", "score": -0.05174289931278422, "first_commit": "2025-02-17 00:13:09", "latest_commit": "2025-02-20 00:32:44", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "概要このデータセットは思考モデルを製作する際のもととなる質問データを集めたものになります。 ", "url": "https://huggingface.co/datasets/Nurture-intelligence/thinking_dataset_v1", "project_name": "thinking_dataset_v1", "downloads": 51, "source": "Hugging Face", "score": -0.05174289931278422, "first_commit": "2024-12-22 08:13:59", "latest_commit": "2024-12-22 08:35:44", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "評価スコアの再現性確保と SB Intuitions 修正版の公開用クローンソース: yahoojapan/JGLUE on GitHub datasets/jcommonsenseqa-v1.1 JCommonsenseQA JCommonsenseQA is a Japanese version of CommonsenseQA (Talmor+, 2019), which is a multiple-choice question answering dataset that requires commonsense reasoning ability.", "url": "https://huggingface.co/datasets/sbintuitions/JCommonsenseQA", "project_name": "JCommonsenseQA", "downloads": 51, "source": "Hugging Face", "score": -0.05174289931278422, "first_commit": "2024-07-30 05:21:01", "latest_commit": "2024-12-27 08:05:47", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning", "Commonsense Reasoning", "Semantic Text Processing" ] }, { "description": "msmarco-ja-hard-negatives hpprc/msmarco-ja で公開されているMS MARCOの日本語翻訳データに、以下の処理を加えたハードネガティブマイニングをしたものです。", "url": "https://huggingface.co/datasets/hotchpotch/msmarco-ja-hard-negatives", "project_name": "msmarco-ja-hard-negatives", "downloads": 51, "source": "Hugging Face", "score": -0.05174289931278422, "first_commit": "2024-11-18 02:33:24", "latest_commit": "2024-11-18 20:46:33", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "magpie-qwen2.5-32b-reasoning-100k-formatted DeL-TaiseiOzaki/magpie-qwen2.5-32b-reasoning-100kをOpenAI messages形式に変換したデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/magpie-qwen2.5-32b-reasoning-100k-formatted", "project_name": "magpie-qwen2.5-32b-reasoning-100k-formatted", "downloads": 51, "source": "Hugging Face", "score": -0.05174289931278422, "first_commit": "2024-11-20 12:42:37", "latest_commit": "2024-11-25 16:44:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Model description This model is a fine-tuned version of facebook/wav2vec2-xls-r-1b on my collection of Public Japanese Voice datasets for research Common Voice 7.0, JUST (Japanese speech corpus of Saruwatari-lab.", "url": "https://huggingface.co/vumichien/wav2vec2-xls-r-1b-japanese", "project_name": "wav2vec2-xls-r-1b-japanese", "downloads": 50, "source": "Hugging Face", "score": -0.051749998438478724, "first_commit": "2022-01-30 15:54:05", "latest_commit": "2023-02-08 00:22:33", "languages": [], "model_or_dataset": "model", "model_size": 0.963, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Stockmark-2-100B-Instruct-beta-gguf stockmarkさんが公開しているStockmark-2-100B-Instruct-betaのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Stockmark-2-100B-Instruct-beta-gguf", "project_name": "Stockmark-2-100B-Instruct-beta-gguf", "downloads": 50, "source": "Hugging Face", "score": -0.051749998438478724, "first_commit": "2025-03-06 17:09:42", "latest_commit": "2025-03-07 11:43:42", "languages": [], "model_or_dataset": "model", "model_size": 96.0, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "DataPilot様の ArrowPro-7B-RobinHood をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/ArrowPro-7B-RobinHood-GGUF", "project_name": "ArrowPro-7B-RobinHood-GGUF", "downloads": 50, "source": "Hugging Face", "score": -0.051749998438478724, "first_commit": "2024-05-10 12:03:26", "latest_commit": "2024-05-10 18:14:28", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "llm-jp-corpus-v3のkakenサブセットをHFフォーマットに変換し、各データに付与されたURLから元記事のタイトルを取得可能なものについては取得して付与したデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/llmjp-kaken", "project_name": "llmjp-kaken", "downloads": 50, "source": "Hugging Face", "score": -0.051749998438478724, "first_commit": "2024-09-18 02:50:21", "latest_commit": "2024-12-08 15:03:11", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ja-stackoverflow 日本語版 Stack Overflow のスタック・オーバーフローのデータダンプをもとにデータを加工し、質問文と回答文のペアになるように調整した QA データセット。 ", "url": "https://huggingface.co/datasets/p1atdev/ja-stackoverflow", "project_name": "ja-stackoverflow", "downloads": 50, "source": "Hugging Face", "score": -0.051749998438478724, "first_commit": "2023-12-16 02:41:12", "latest_commit": "2023-12-21 05:30:24", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Annotation and Dataset Development" ] }, { "description": "Synthetic-JP-EN-Coding-Dataset-Magpie-69k Magpieの手法を様々なモデルに対して適用し作成した、約69000件の日本語・英語のコーディング対話データセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-EN-Coding-Dataset-Magpie-69k", "project_name": "Synthetic-JP-EN-Coding-Dataset-Magpie-69k", "downloads": 50, "source": "Hugging Face", "score": -0.051749998438478724, "first_commit": "2024-07-11 10:19:45", "latest_commit": "2024-07-11 12:07:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Dataset origin: https://github.com/doc-analysis/XFUND XFUND:", "url": "https://huggingface.co/datasets/FrancophonIA/XFUND", "project_name": "XFUND", "downloads": 50, "source": "Hugging Face", "score": -0.051749998438478724, "first_commit": "2024-09-30 21:22:34", "latest_commit": "2024-10-11 18:27:27", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Annotation and Dataset Development" ] }, { "description": "sarashina2.1-1b-sft-gguf Aratakoさんが公開しているsarashina2.1-1b-sftのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/sarashina2.1-1b-sft-gguf", "project_name": "sarashina2.1-1b-sft-gguf", "downloads": 49, "source": "Hugging Face", "score": -0.05175709756417324, "first_commit": "2024-12-11 12:31:29", "latest_commit": "2024-12-11 13:21:21", "languages": [], "model_or_dataset": "model", "model_size": 1.41, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese-WizardLM2-ChatV-7B-GGUF GGUF conversion of \"Japanese-WizardLM2-ChatV-7B\" This model, Japanese-WizardLM2-ChatV-7B, is based on \"chatntq-ja-7b-v1.0 \", and was created by subtracting \"Mistral-7B-v0.1\" from \"WizardLM-2-7b\" ChatVector was added by a factor of 1.0.", "url": "https://huggingface.co/umiyuki/Japanese-WizardLM2-ChatV-7B-GGUF", "project_name": "Japanese-WizardLM2-ChatV-7B-GGUF", "downloads": 49, "source": "Hugging Face", "score": -0.05175709756417324, "first_commit": "2024-04-16 14:45:30", "latest_commit": "2024-04-17 01:41:16", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "Wikidata parallel descriptions en-ja Parallel corpus for machine translation generated from wikidata dump (2024-05-06).", "url": "https://huggingface.co/datasets/Mitsua/wikidata-parallel-descriptions-en-ja", "project_name": "wikidata-parallel-descriptions-en-ja", "downloads": 49, "source": "Hugging Face", "score": -0.05175709756417324, "first_commit": "2024-05-13 12:02:43", "latest_commit": "2024-05-17 00:25:10", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "このデータセットは、Wikipediaデータセットの日本語データのみを抽出したparquetファイルであるrange3/wikipedia-ja-20230101より、「Category:投資」に含まれる記事に該当するレコードの一部を抽出した作業用サンプルです。", "url": "https://huggingface.co/datasets/onewanto/sample-dataset-wikipedia-financial-terms", "project_name": "sample-dataset-wikipedia-financial-terms", "downloads": 49, "source": "Hugging Face", "score": -0.05175709756417324, "first_commit": "2025-04-17 07:15:23", "latest_commit": "2025-04-17 07:37:31", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "リアル系モデルに特有の肖像権の問題について比較的クリアなモデルを作ることが可能なように、私が私自身から作り出した人工超彼女（ver 2.1系、ver 2.6系）のデータセット（約2800枚）を作成しました。 ", "url": "https://huggingface.co/datasets/ThePioneer/Artificial-super-girlfriend-for-fine-tuning", "project_name": "Artificial-super-girlfriend-for-fine-tuning", "downloads": 49, "source": "Hugging Face", "score": -0.05175709756417324, "first_commit": "2023-05-05 01:48:37", "latest_commit": "2023-05-05 04:57:44", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Annotation and Dataset Development" ] }, { "description": "Japanese Ultrachat 6.6k Japanese Ultrachat 6.6k is the Japanese-translated version of the subset of ultrachat_200k using machine translation.", "url": "https://huggingface.co/datasets/EQUES/japanese_ultrachat_6.6k", "project_name": "japanese_ultrachat_6.6k", "downloads": 49, "source": "Hugging Face", "score": -0.05175709756417324, "first_commit": "2025-02-18 23:50:59", "latest_commit": "2025-02-19 05:58:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "txt: Wrime-v1のテキスト部分の一部と、OpenAIに生成させた文章をベースに、tohoku-nlp/bert-base-japanese-whole-word-masking でトークナイズした文章を文脈が成り立つ形で合成し、新たな文章を生成したもの。 ", "url": "https://huggingface.co/datasets/sode-k/txt_suicidality", "project_name": "txt_suicidality", "downloads": 49, "source": "Hugging Face", "score": -0.05175709756417324, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Shisa V2 Shisa V2 is a family of bilingual Japanese and English (JA/EN)", "url": "https://huggingface.co/shisa-ai/shisa-v2-qwen2.5-7b", "project_name": "shisa-v2-qwen2.5-7b", "downloads": 48, "source": "Hugging Face", "score": -0.05176419668986775, "first_commit": "2025-04-12 13:24:25", "latest_commit": "2025-04-16 13:26:43", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "🌟 Ojisan構文変換モデル (GRPO + Unsloth + LoRA) このプロジェクトは、文章を「おじさん構文」に変換する日本語モデルを作成・学習するためのコードです。", "url": "https://huggingface.co/takuyadayo/ozisan", "project_name": "ozisan", "downloads": 48, "source": "Hugging Face", "score": -0.05176419668986775, "first_commit": "2025-03-23 09:07:25", "latest_commit": "2025-03-30 04:16:18", "languages": [], "model_or_dataset": "model", "model_size": 14.7, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF Model creator: MaziyarPanahi Original model: MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1 Description MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF contains GGUF format model files for MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1.", "url": "https://huggingface.co/MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF", "project_name": "japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF", "downloads": 48, "source": "Hugging Face", "score": -0.05176419668986775, "first_commit": "2024-01-26 06:13:55", "latest_commit": "2024-01-26 06:36:22", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Heron BLIP Japanese StableLM", "url": "https://huggingface.co/turing-motors/heron-chat-blip-ja-stablelm-base-7b-v1-llava-620k", "project_name": "heron-chat-blip-ja-stablelm-base-7b-v1-llava-620k", "downloads": 48, "source": "Hugging Face", "score": -0.05176419668986775, "first_commit": "2024-02-27 13:48:02", "latest_commit": "2024-02-27 13:59:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VideoBlipForConditionalGeneration", "multi_labels": [ "Visual Data in NLP", "Language Models", "Multimodality" ] }, { "description": "Ruri-Reranker: Japanese General Reranker Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-reranker-stage1-large", "project_name": "ruri-reranker-stage1-large", "downloads": 48, "source": "Hugging Face", "score": -0.05176419668986775, "first_commit": "2024-08-19 23:48:54", "latest_commit": "2024-09-04 08:54:05", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "CC-MAIN-2019-35へようこそ本データセットはCommonCrawlerと呼ばれるものから日本語のみを抽出したものです。 ", "url": "https://huggingface.co/datasets/cc-clean/CC-MAIN-2019-35", "project_name": "CC-MAIN-2019-35", "downloads": 48, "source": "Hugging Face", "score": -0.05176419668986775, "first_commit": "2024-12-22 01:37:19", "latest_commit": "2024-12-22 08:07:11", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ichikara-instruction-003-sharegpt Dataset by DataPilot データセット概要 (Dataset Summary) このデータセットは、kinokokoro/ichikara-instruction-003 で公開されている日本語インストラクションデータを、広く利用されている ShareGPT形式に変換したものです。", "url": "https://huggingface.co/datasets/DataPilot/ichikara-instruction-003-sharegpt", "project_name": "ichikara-instruction-003-sharegpt", "downloads": 48, "source": "Hugging Face", "score": -0.05176419668986775, "first_commit": "2025-04-04 01:28:16", "latest_commit": "2025-04-04 01:31:31", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "自動生成のマルチターンデータセットオープンなデータソースから､Calm3-22bを使ってQ&Aを自動生成したものです｡一部の計算には東京工業大学のスーパーコンピュータTSUBAME4.0を利用しました｡データソースはじめの質問(q1)を､種々のデータソースから収集しました｡その後のやりとりはすべて､Calmが生成しました｡質問文については､元データのライセンスに準拠します｡ oasst2-33k-ja apache 2.0 databricks-dolly-15k-ja cc-by-sa-3.0 minnade CC0 cyberagent/chatbot-arena-ja-calm2-7b-chat-experimental cc-by-4.0", "url": "https://huggingface.co/datasets/kanhatakeyama/AutoMultiTurnByCalm3-22B", "project_name": "AutoMultiTurnByCalm3-22B", "downloads": 48, "source": "Hugging Face", "score": -0.05176419668986775, "first_commit": "2024-07-17 09:53:20", "latest_commit": "2024-07-17 10:03:02", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Dialogue Systems & Conversational Agents" ] }, { "description": "以下のデータ源からランダムに抽出した日本語のテキストをもとに､Phi-3で作文したコーパスです｡ OpenMathInstruct-1-1.8m-ja コードこちら一部の計算には東京工業大学のスーパーコンピュータTSUBAME4.0を利用しました｡", "url": "https://huggingface.co/datasets/kanhatakeyama/SyntheticTextOpenMathInstruct", "project_name": "SyntheticTextOpenMathInstruct", "downloads": 48, "source": "Hugging Face", "score": -0.05176419668986775, "first_commit": "2024-06-13 07:47:23", "latest_commit": "2024-06-14 07:12:50", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "hh-rlhf-12k-ja This repository provides a human preference dataset developed by LLM-jp, a collaborative project launched in Japan.", "url": "https://huggingface.co/datasets/llm-jp/hh-rlhf-12k-ja", "project_name": "hh-rlhf-12k-ja", "downloads": 48, "source": "Hugging Face", "score": -0.05176419668986775, "first_commit": "2024-02-04 21:19:53", "latest_commit": "2024-02-04 21:45:59", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "For more details & to download the rest of the dataset(paid),please refer to the link: https://www.nexdata.ai/datasets/nlu/153?", "url": "https://huggingface.co/datasets/Nexdata/Japanese-English_Parallel_Corpus_Data", "project_name": "Japanese-English_Parallel_Corpus_Data", "downloads": 48, "source": "Hugging Face", "score": -0.05176419668986775, "first_commit": "2023-11-08 10:50:47", "latest_commit": "2024-08-09 10:25:31", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Annotation and Dataset Development" ] }, { "description": "Amenokaku-Code-Instruct Update: 2023/12/27データセットに JaxTon , プロになるJava のコードデータ 180 レコードを追加しました。 ", "url": "https://huggingface.co/datasets/kunishou/amenokaku-code-instruct", "project_name": "amenokaku-code-instruct", "downloads": 48, "source": "Hugging Face", "score": -0.05176419668986775, "first_commit": "2023-10-01 01:04:50", "latest_commit": "2024-04-01 17:01:54", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation" ] }, { "description": "alabnii/jmedroberta-base-manbyo-wordpiece Model description This is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", "url": "https://huggingface.co/alabnii/jmedroberta-base-manbyo-wordpiece", "project_name": "jmedroberta-base-manbyo-wordpiece", "downloads": 47, "source": "Hugging Face", "score": -0.05177129581556226, "first_commit": "2022-12-22 17:17:03", "latest_commit": "2023-03-08 01:44:36", "languages": [], "model_or_dataset": "model", "model_size": 0.109, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "nlp-waseda/gpt2-small-japanese This model is Japanese GPT-2 pretrained on Japanese Wikipedia and CC-100.", "url": "https://huggingface.co/nlp-waseda/gpt2-small-japanese", "project_name": "gpt2-small-japanese", "downloads": 47, "source": "Hugging Face", "score": -0.05177129581556226, "first_commit": "2022-03-30 03:34:11", "latest_commit": "2022-03-30 04:28:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Description This is a templated version of data from ~40 Japanese open source downstream task datasets.", "url": "https://huggingface.co/datasets/Ego/jpflan", "project_name": "jpflan", "downloads": 47, "source": "Hugging Face", "score": -0.05177129581556226, "first_commit": "2024-04-17 06:28:50", "latest_commit": "2024-04-18 00:22:26", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "fungi_indexed_mycological_papers_japanese 大菌輪「論文3行まとめ」データセット最終更新日：2024/9/28（R3-12108まで） Languages Japanese This dataset is available in Japanese only.", "url": "https://huggingface.co/datasets/Atsushi/fungi_indexed_mycological_papers_japanese", "project_name": "fungi_indexed_mycological_papers_japanese", "downloads": 47, "source": "Hugging Face", "score": -0.05177129581556226, "first_commit": "2022-02-22 22:21:32", "latest_commit": "2024-06-04 22:41:32", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Indexing" ] }, { "description": "各レコードのurl列が出典となります。 ", "url": "https://huggingface.co/datasets/numad/yuho-text-2024", "project_name": "yuho-text-2024", "downloads": 47, "source": "Hugging Face", "score": -0.05177129581556226, "first_commit": "2024-06-14 11:17:32", "latest_commit": "2024-06-14 11:18:17", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "NikolayKozloff/gemma-2-2b-jpn-it-Q8_0-GGUF", "url": "https://huggingface.co/NikolayKozloff/gemma-2-2b-jpn-it-Q8_0-GGUF", "project_name": "gemma-2-2b-jpn-it-Q8_0-GGUF", "downloads": 46, "source": "Hugging Face", "score": -0.05177839494125677, "first_commit": "2024-10-03 09:34:39", "latest_commit": "2024-10-03 09:34:54", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Ruri: Japanese General Text Embeddings Usage First install the Sentence Transformers library: pip install -U sentence-transformers Then you can load this model and run inference.", "url": "https://huggingface.co/cl-nagoya/ruri-pt-large", "project_name": "ruri-pt-large", "downloads": 46, "source": "Hugging Face", "score": -0.05177839494125677, "first_commit": "2024-08-19 00:58:49", "latest_commit": "2024-08-30 00:59:26", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "BertModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "jaCappella corpus : Japanese a cappella vocal ensemble corpus The jaCappella corpus is a corpus of Japanese a cappella vocal ensembles.", "url": "https://huggingface.co/datasets/jaCappella/jaCappella", "project_name": "jaCappella", "downloads": 46, "source": "Hugging Face", "score": -0.05177839494125677, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "This repository contains the dataset used for the TaCo paper.", "url": "https://huggingface.co/datasets/saillab/alpaca_japanese_taco", "project_name": "alpaca_japanese_taco", "downloads": 46, "source": "Hugging Face", "score": -0.05177839494125677, "first_commit": "2024-06-04 00:51:02", "latest_commit": "2024-09-20 22:09:00", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Cross-Lingual Transfer", "Annotation and Dataset Development" ] }, { "description": "Kendamarron/jimba-wiki-instruction-calm3 grapevine-AI/CALM3-22B-Chat-GGUFのQ4_K_Mを使った合成instructionデータセットです。 ", "url": "https://huggingface.co/datasets/Kendamarron/jimba-wiki-instruction-calm3", "project_name": "jimba-wiki-instruction-calm3", "downloads": 46, "source": "Hugging Face", "score": -0.05177839494125677, "first_commit": "2024-07-09 22:18:35", "latest_commit": "2024-07-20 12:57:05", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "ShareGPT-Processed The RyokoAI/ShareGPT52K dataset, converted to Markdown and labeled with the language used.", "url": "https://huggingface.co/datasets/zetavg/ShareGPT-Processed", "project_name": "ShareGPT-Processed", "downloads": 46, "source": "Hugging Face", "score": -0.05177839494125677, "first_commit": "2023-05-16 19:50:04", "latest_commit": "2023-05-21 03:50:14", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "This is a ChatML model.", "url": "https://huggingface.co/yamatazen/HMS-Slerp-12B", "project_name": "HMS-Slerp-12B", "downloads": 45, "source": "Hugging Face", "score": -0.051785494066951286, "first_commit": "2025-04-16 08:01:00", "latest_commit": "2025-04-16 09:47:20", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "ryota39様の Tora-7B-v0.1 をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Tora-7B-v0.1-GGUF", "project_name": "Tora-7B-v0.1-GGUF", "downloads": 45, "source": "Hugging Face", "score": -0.051785494066951286, "first_commit": "2024-05-07 11:24:35", "latest_commit": "2024-06-15 03:16:21", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Local-Novel-LLM-project様の Ninja-V3 をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Ninja-V3-GGUF", "project_name": "Ninja-V3-GGUF", "downloads": 45, "source": "Hugging Face", "score": -0.051785494066951286, "first_commit": "2024-07-03 11:52:04", "latest_commit": "2024-07-03 16:59:05", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese-Roleplay-Dialogues This is a dialogue corpus collected from Japanese role-playing forum (commonly known as \"なりきりチャット(narikiri chat)\").", "url": "https://huggingface.co/datasets/OmniAICreator/Japanese-Roleplay-Dialogues", "project_name": "Japanese-Roleplay-Dialogues", "downloads": 45, "source": "Hugging Face", "score": -0.051785494066951286, "first_commit": "2024-06-08 08:41:10", "latest_commit": "2024-06-08 16:25:27", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "CoTangentは人手で作成された高品質でクリーンな100セットの日本語CoT用データセットです。 ", "url": "https://huggingface.co/datasets/sudy-super/CoTangent", "project_name": "CoTangent", "downloads": 45, "source": "Hugging Face", "score": -0.051785494066951286, "first_commit": "2023-07-04 09:15:33", "latest_commit": "2023-07-15 14:45:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality" ] }, { "description": "概要 NHKで定期的に放送されていた『着信御礼！", "url": "https://huggingface.co/datasets/YANS-official/ogiri-keitai", "project_name": "ogiri-keitai", "downloads": 45, "source": "Hugging Face", "score": -0.051785494066951286, "first_commit": "2024-07-20 10:11:36", "latest_commit": "2024-08-30 10:13:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "このデータセットは、Wikipediaデータセットの日本語データのみを抽出したparquetファイルであるrange3/wikipedia-ja-20230101より、「Category:日経平均株価」に含まれる記事に該当するレコードを抽出した作業用サンプルです。", "url": "https://huggingface.co/datasets/onewanto/sample-dataset-wikipedia-nikkei225", "project_name": "sample-dataset-wikipedia-nikkei225", "downloads": 45, "source": "Hugging Face", "score": -0.051785494066951286, "first_commit": "2025-04-17 07:16:22", "latest_commit": "2025-04-17 07:42:24", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "CT-RATE-JPN Dataset CT-RATE-JPN is a Japanese-translated version of radiology reports from the CT-RATE dataset, which contains chest CT volumes paired with corresponding radiology reports.", "url": "https://huggingface.co/datasets/YYama0/CT-RATE-JPN", "project_name": "CT-RATE-JPN", "downloads": 45, "source": "Hugging Face", "score": -0.051785494066951286, "first_commit": "2024-12-02 04:38:04", "latest_commit": "2024-12-23 06:14:10", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "Dataset 5M (5121625) clean Japanese full sentence with the context.", "url": "https://huggingface.co/datasets/AhmedSSabir/Japanese-wiki-dump-sentence-dataset", "project_name": "Japanese-wiki-dump-sentence-dataset", "downloads": 45, "source": "Hugging Face", "score": -0.051785494066951286, "first_commit": "2022-06-08 11:34:04", "latest_commit": "2023-07-11 12:22:09", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Semantic Similarity", "Semantic Text Processing", "Low-Resource NLP", "Annotation and Dataset Development" ] }, { "description": "fungi_diagnostic_chars_comparison_japanese大菌輪「識別形質まとめ」データセット最終更新日 /", "url": "https://huggingface.co/datasets/Atsushi/fungi_diagnostic_chars_comparison_japanese", "project_name": "fungi_diagnostic_chars_comparison_japanese", "downloads": 45, "source": "Hugging Face", "score": -0.051785494066951286, "first_commit": "2022-02-20 10:16:30", "latest_commit": "2024-06-04 22:42:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining" ] }, { "description": "bert-japanese_finetuned-sentiment-analysis This model was trained from scratch on the Japanese Sentiment Polarity Dictionary dataset.", "url": "https://huggingface.co/minutillamolinara/bert-japanese_finetuned-sentiment-analysis", "project_name": "bert-japanese_finetuned-sentiment-analysis", "downloads": 44, "source": "Hugging Face", "score": -0.05179259319264579, "first_commit": "2023-03-31 02:28:09", "latest_commit": "2023-03-31 13:13:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Language Models", "Semantic Text Processing", "Sentiment Analysis" ] }, { "description": "GGUF conversion of NTQAI/chatntq-ja-7b-v1.0 ChatNTQ-JA-7b-v1.0 is a Japanese chat fine-tuned model built on top of the stabilityai/japanese-stablelm-base-gamma-7b, which is originally based on Mistral 7B v0.1.", "url": "https://huggingface.co/TFMC/ChatNTQ-JA-7b-v1.0-GGUF", "project_name": "ChatNTQ-JA-7b-v1.0-GGUF", "downloads": 44, "source": "Hugging Face", "score": -0.05179259319264579, "first_commit": "2024-04-03 22:42:14", "latest_commit": "2024-04-04 23:10:54", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Chat-Vector-LLaVA-v1.5-7b-JA Model Card Model detail Model type: Chat-Vector-LLaVA-v1.5-7b-JA is a vision-language model that can converse about input images in Japanese.", "url": "https://huggingface.co/toshi456/chat-vector-llava-v1.5-7b-ja", "project_name": "chat-vector-llava-v1.5-7b-ja", "downloads": 44, "source": "Hugging Face", "score": -0.05179259319264579, "first_commit": "2024-05-06 04:07:19", "latest_commit": "2024-05-06 11:33:32", "languages": [], "model_or_dataset": "model", "model_size": 7.06, "model_architectures": "LlavaLlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ascktgcc/Mistral-Nemo-Japanese-Instruct-2408-Q4_K_S-GGUF", "url": "https://huggingface.co/ascktgcc/Mistral-Nemo-Japanese-Instruct-2408-Q4_K_S-GGUF", "project_name": "Mistral-Nemo-Japanese-Instruct-2408-Q4_K_S-GGUF", "downloads": 44, "source": "Hugging Face", "score": -0.05179259319264579, "first_commit": "2024-09-27 02:44:01", "latest_commit": "2024-09-27 02:44:32", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Tanuki-8x8B-dpo-v1.0-GPTQ-8bit 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8x8B-dpo-v1.0のGPTQ 8bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-GPTQ-8bit", "project_name": "Tanuki-8x8B-dpo-v1.0-GPTQ-8bit", "downloads": 44, "source": "Hugging Face", "score": -0.05179259319264579, "first_commit": "2024-08-28 02:30:27", "latest_commit": "2024-09-03 09:26:02", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "TanukiForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/senryu-shashin\", split=\"train\") 概要株式会社東建コーポレーションが運営するホームメイト・リサーチによる『ホームメイト川柳大賞』のうち、お題が画像形式で提供される『写真川柳』に関するクロールデータです。", "url": "https://huggingface.co/datasets/YANS-official/senryu-shashin", "project_name": "senryu-shashin", "downloads": 44, "source": "Hugging Face", "score": -0.05179259319264579, "first_commit": "2024-08-28 18:50:08", "latest_commit": "2024-08-31 03:47:50", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "DataPilot/Zero_SFT_Ja_by_Mistral_Small このデータセットは、日本語で記述された高品質な合成プロンプトとそのAI出力を収録しています。", "url": "https://huggingface.co/datasets/DataPilot/Zero_SFT_Ja_by_Mistral_Small", "project_name": "Zero_SFT_Ja_by_Mistral_Small", "downloads": 44, "source": "Hugging Face", "score": -0.05179259319264579, "first_commit": "2025-04-07 15:59:22", "latest_commit": "2025-04-07 16:01:29", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Synthetic-JP-Roleplay-Instruction-Nemotron-4 Magpieの手法をnvidia/Nemotron-4-340B-Instructに対して適用し作成した、約1000件の日本語ロールプレイ用のinstructionデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-Roleplay-Instruction-Nemotron-4-1k", "project_name": "Synthetic-JP-Roleplay-Instruction-Nemotron-4-1k", "downloads": 44, "source": "Hugging Face", "score": -0.05179259319264579, "first_commit": "2024-06-23 08:28:26", "latest_commit": "2024-06-23 08:42:32", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "This dataset is just a sample of Japanese Conversational Speech by Mobile Phone(paid dataset).", "url": "https://huggingface.co/datasets/Nexdata/Japanese_Conversational_Speech_by_Mobile_Phone", "project_name": "Japanese_Conversational_Speech_by_Mobile_Phone", "downloads": 44, "source": "Hugging Face", "score": -0.05179259319264579, "first_commit": "2022-09-16 10:14:35", "latest_commit": "2024-08-09 03:13:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "KUM-Bench: A Benchmark for Advanced Japanese Reasoning Capabilities KUM-Bench (Kyoto University Math Entrance Exam Benchmark) is designed to evaluate advanced Japanese reasoning capabilities by leveraging mathematics entrance exam questions from Kyoto University—one of the most prestigious universities in Japan.", "url": "https://huggingface.co/datasets/Inoichan/KUM-Bench", "project_name": "KUM-Bench", "downloads": 44, "source": "Hugging Face", "score": -0.05179259319264579, "first_commit": "2025-01-13 07:18:11", "latest_commit": "2025-01-13 14:40:24", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning" ] }, { "description": "bert-base-japanese-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-upos", "project_name": "bert-base-japanese-upos", "downloads": 43, "source": "Hugging Face", "score": -0.05179969231834031, "first_commit": "2021-08-26 23:02:50", "latest_commit": "2022-09-18 19:43:26", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "Leia-Swallow-7B LEIA is a training technique for autoregressive LLMs that effectively improves their performance in languages other than English by enhancing cross-lingual knowledge transfer from English to a target language.", "url": "https://huggingface.co/leia-llm/Leia-Swallow-7b", "project_name": "Leia-Swallow-7b", "downloads": 43, "source": "Hugging Face", "score": -0.05179969231834031, "first_commit": "2024-04-17 07:12:28", "latest_commit": "2024-04-17 10:29:56", "languages": [], "model_or_dataset": "model", "model_size": 6.83, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Cross-Lingual Transfer", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Llama-3-8B-Japanese-Instruct-GGUF Original Model haqishen/Llama-3-8B-Japanese-Instruct Run with Gaianet Prompt template: prompt template: llama-3-chat Context size: chat_ctx_size: 4096 Run with GaiaNet:", "url": "https://huggingface.co/gaianet/Llama-3-8B-Japanese-Instruct-GGUF", "project_name": "Llama-3-8B-Japanese-Instruct-GGUF", "downloads": 43, "source": "Hugging Face", "score": -0.05179969231834031, "first_commit": "2024-05-14 05:38:05", "latest_commit": "2024-05-16 13:44:53", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "dataset split from joujiboi/japanese-anime-speech-v2", "url": "https://huggingface.co/datasets/hhim8826/japanese-anime-speech-v2-split", "project_name": "japanese-anime-speech-v2-split", "downloads": 43, "source": "Hugging Face", "score": -0.05179969231834031, "first_commit": "2025-03-08 20:21:21", "latest_commit": "2025-03-10 10:43:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "るりのステッカー just for fun.", "url": "https://huggingface.co/datasets/LiuliFox/stickers", "project_name": "stickers", "downloads": 43, "source": "Hugging Face", "score": -0.05179969231834031, "first_commit": "2024-11-21 18:04:34", "latest_commit": "2024-11-21 22:42:12", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Overview This dataset is of conversations extracted from Aozora Bunko (青空文庫), which collects public-domain books in Japan, using a simple heuristic approach.", "url": "https://huggingface.co/datasets/globis-university/aozorabunko-chats", "project_name": "aozorabunko-chats", "downloads": 43, "source": "Hugging Face", "score": -0.05179969231834031, "first_commit": "2023-08-04 00:11:23", "latest_commit": "2023-10-27 13:26:00", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "概要 elyza/Llama-3-ELYZA-JP-8Bを元にchat vectorを用いて改良しAItuberに特化させました。 ", "url": "https://huggingface.co/DataPilot/Llama3-ArrowSE-8B-v0.3", "project_name": "Llama3-ArrowSE-8B-v0.3", "downloads": 42, "source": "Hugging Face", "score": -0.05180679144403482, "first_commit": "2024-07-06 15:39:54", "latest_commit": "2024-07-07 14:18:02", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "kajuma/CC-news-2024-July-October-cleanedを元に、9月、10月のニュースのみを抜き出したデータセット。 ", "url": "https://huggingface.co/datasets/ikedachin/CC-news-2024-October-cleaned-cpt-set-250127", "project_name": "CC-news-2024-October-cleaned-cpt-set-250127", "downloads": 42, "source": "Hugging Face", "score": -0.05180679144403482, "first_commit": "2025-01-27 11:31:53", "latest_commit": "2025-01-27 11:38:19", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Anime Quotes Dataset ― アニメの名言データセット🎐 Welcome to Anime Quotes Dataset Overview This dataset contains a curated collection of inspiring and memorable quotes from various anime series, sourced from the Anime Motivation website.", "url": "https://huggingface.co/datasets/mohamed-khalil/AnimeQuotes", "project_name": "AnimeQuotes", "downloads": 42, "source": "Hugging Face", "score": -0.05180679144403482, "first_commit": "2024-02-18 18:26:50", "latest_commit": "2024-02-21 15:17:52", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "recruit-jp/japanese-image-classification-evaluation-dataset Overview Developed by: Recruit Co.", "url": "https://huggingface.co/datasets/recruit-jp/japanese-image-classification-evaluation-dataset", "project_name": "japanese-image-classification-evaluation-dataset", "downloads": 42, "source": "Hugging Face", "score": -0.05180679144403482, "first_commit": "2023-12-19 09:17:24", "latest_commit": "2024-01-22 10:48:13", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Visual Data in NLP", "Information Retrieval", "Text Classification", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "dialogsum-ja このデータセットはdialogsum、CSDSなどを翻訳した日本語対話要約データセットです。 ", "url": "https://huggingface.co/datasets/sudy-super/dialogsum-ja", "project_name": "dialogsum-ja", "downloads": 42, "source": "Hugging Face", "score": -0.05180679144403482, "first_commit": "2023-07-15 10:16:24", "latest_commit": "2023-07-15 10:27:58", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Dialogue Systems & Conversational Agents" ] }, { "description": "nlp-waseda/bigbird-base-japanese Model description This is a Japanese BigBird base model pretrained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/nlp-waseda/bigbird-base-japanese", "project_name": "bigbird-base-japanese", "downloads": 41, "source": "Hugging Face", "score": -0.05181389056972933, "first_commit": "2023-06-03 12:51:12", "latest_commit": "2023-06-20 10:49:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BigBirdForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "albert-base-japanese-v1-with-japanese 日本語事前学習済みALBERTモデルですこのモデルではTokenizerにBertJapaneseTokenizerクラスを利用していますalbert-base-japanese-v1よりトークナイズ処理が楽になっています How to use ファインチューニングこのモデルはPreTrainedモデルです基本的には各種タスク用にファインチューニングして使用されることを想定しています Fill-Mask for PyTorch from transformers import ( AutoModelForMaskedLM, AutoTokenizer ) tokenizer = AutoTokenizer.from_pretrained(\"ken11/albert-base-japanese-v1-with-japanese-tokenizer\")", "url": "https://huggingface.co/ken11/albert-base-japanese-v1-with-japanese-tokenizer", "project_name": "albert-base-japanese-v1-with-japanese-tokenizer", "downloads": 41, "source": "Hugging Face", "score": -0.05181389056972933, "first_commit": "2022-04-20 16:34:22", "latest_commit": "2022-04-21 02:28:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "AlbertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "モデルの説明(English explanation is below.", "url": "https://huggingface.co/keitokei1994/swallow-3-8B-sqlcoder-2x8B-GGUF", "project_name": "swallow-3-8B-sqlcoder-2x8B-GGUF", "downloads": 41, "source": "Hugging Face", "score": -0.05181389056972933, "first_commit": "2024-07-03 11:02:45", "latest_commit": "2024-07-04 07:20:41", "languages": [], "model_or_dataset": "model", "model_size": 13.7, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "以下の条件に同意したうえで、公開されたモデル及びデータセット等（以下「本コンテンツ」）といいます）をダウンロードします。 ", "url": "https://huggingface.co/datasets/weblab-GENIAC/aya-ja-nemotron-dpo-masked", "project_name": "aya-ja-nemotron-dpo-masked", "downloads": 41, "source": "Hugging Face", "score": -0.05181389056972933, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "VTuber YouTube Channel List Dataset このデータセットは、VTuber チャンネルと VTuber でない（例：料理チャンネルなど）の YouTube チャンネルのメタデータを JSONL 形式でまとめたものです。", "url": "https://huggingface.co/datasets/ayousanz/vtuber-youtube-list-dataset", "project_name": "vtuber-youtube-list-dataset", "downloads": 41, "source": "Hugging Face", "score": -0.05181389056972933, "first_commit": "2025-02-16 15:41:14", "latest_commit": "2025-02-17 07:55:47", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "概要このデータセットはglaive-aiが公開しているin-foxhoundをKUJIRAを用いて日本語に翻訳したものになります。 ", "url": "https://huggingface.co/datasets/DataPilot/in-foxhound-ja", "project_name": "in-foxhound-ja", "downloads": 41, "source": "Hugging Face", "score": -0.05181389056972933, "first_commit": "2024-09-13 04:27:59", "latest_commit": "2024-09-13 04:32:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Japanese-Law-Translation Dataset Summary", "url": "https://huggingface.co/datasets/Hoshikuzu/Japanese-Law-Translation", "project_name": "Japanese-Law-Translation", "downloads": 41, "source": "Hugging Face", "score": -0.05181389056972933, "first_commit": "2024-08-24 14:43:16", "latest_commit": "2024-08-25 13:26:11", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "日本語Wikipedia入力誤りデータセット概要これは京都大学言語メディア研究室によって公開されているデータセットをHuggingFaceで使用できるよう変換したものです．", "url": "https://huggingface.co/datasets/JunSotohigashi/JapaneseWikipediaTypoDataset", "project_name": "JapaneseWikipediaTypoDataset", "downloads": 41, "source": "Hugging Face", "score": -0.05181389056972933, "first_commit": "2025-01-22 03:40:19", "latest_commit": "2025-01-22 08:18:40", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Hibernates-JP-1.3b-Max This is a Japanese vision-language model based on LLaVA architecture with 1.3B parameters.", "url": "https://huggingface.co/Hibernates/Hibernates-JP-1.3b-Max", "project_name": "Hibernates-JP-1.3b-Max", "downloads": 40, "source": "Hugging Face", "score": -0.05182098969542384, "first_commit": "2025-02-09 11:41:29", "latest_commit": "2025-02-09 11:58:49", "languages": [], "model_or_dataset": "model", "model_size": 2.1, "model_architectures": "HibernatesGpt2ForCausalLM", "multi_labels": [ "Visual Data in NLP", "Language Models", "Multimodality" ] }, { "description": "Aerner LM-v2 事前学習から全部日本語で学習させたモデルのバージョン2です。 ", "url": "https://huggingface.co/aerner/lm-v2", "project_name": "lm-v2", "downloads": 40, "source": "Hugging Face", "score": -0.05182098969542384, "first_commit": "2023-06-09 15:19:12", "latest_commit": "2023-06-09 16:08:47", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OpenLlamaForCausalLM", "multi_labels": [] }, { "description": "rinna/nekomata-7b-instruction-gguf Overview The model is the GGUF version of rinna/nekomata-7b-instruction.", "url": "https://huggingface.co/rinna/nekomata-7b-instruction-gguf", "project_name": "nekomata-7b-instruction-gguf", "downloads": 40, "source": "Hugging Face", "score": -0.05182098969542384, "first_commit": "2023-12-19 08:11:08", "latest_commit": "2024-07-20 08:38:34", "languages": [], "model_or_dataset": "model", "model_size": 7.72, "model_architectures": null, "multi_labels": [ "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese-Novel-Reward-sarashina2.1-1b このモデルはsbintuitions/sarashina2.1-1bをファインチューニングして作成された日本語小説の品質評価のためのRewardモデルです。 ", "url": "https://huggingface.co/Aratako/Japanese-Novel-Reward-sarashina2.1-1b", "project_name": "Japanese-Novel-Reward-sarashina2.1-1b", "downloads": 40, "source": "Hugging Face", "score": -0.05182098969542384, "first_commit": "2025-02-25 15:03:46", "latest_commit": "2025-03-04 15:24:43", "languages": [], "model_or_dataset": "model", "model_size": 1.22, "model_architectures": "LlamaForSequenceClassification", "multi_labels": [] }, { "description": "TinySlime-1.1B-Chat-v1.0 TinySlime は日本語に特化した小規模言語モデルです。 ", "url": "https://huggingface.co/2121-8/TinySlime-1.1B-Chat-v1.0", "project_name": "TinySlime-1.1B-Chat-v1.0", "downloads": 40, "source": "Hugging Face", "score": -0.05182098969542384, "first_commit": "2024-07-02 03:34:30", "latest_commit": "2024-07-02 08:53:11", "languages": [], "model_or_dataset": "model", "model_size": 1.1, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "LLMChat 概要 GENIAC 松尾研 LLM開発プロジェクトで開発したモデルを人手評価するために構築したLLMChatというシステムで収集された質問とLLMの回答、及び人手評価のデータです。 ", "url": "https://huggingface.co/datasets/team-hatakeyama-phase2/LLMChat", "project_name": "LLMChat", "downloads": 40, "source": "Hugging Face", "score": -0.05182098969542384, "first_commit": "2024-08-25 10:27:23", "latest_commit": "2024-08-28 16:01:48", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/bokete-ogiri-test\", split=\"test\") 概要大喜利投稿サイトBoketeのクロールデータです。", "url": "https://huggingface.co/datasets/YANS-official/ogiri-test-with-references", "project_name": "ogiri-test-with-references", "downloads": 40, "source": "Hugging Face", "score": -0.05182098969542384, "first_commit": "2024-08-18 02:53:04", "latest_commit": "2024-09-02 18:37:28", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Text Generation", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "SakuraLLM Sakura: SFT And RLHF models using Knowledge of Universal Character and Relationship Attributes for Japanese to Chinese Translation in Light Novel & Galgame Domain.", "url": "https://huggingface.co/sakuraumi/Sakura-13B-Galgame", "project_name": "Sakura-13B-Galgame", "downloads": 39, "source": "Hugging Face", "score": -0.05182808882111835, "first_commit": "2023-08-26 16:28:53", "latest_commit": "2024-11-23 09:00:48", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BaichuanForCausalLM", "multi_labels": [] }, { "description": "AXCXEPT/EZO2.5-gemma-3-12b-it-Preview Model Details 昨今登場したLLM自身の力を自力で向上させる「GRPO」や「PPO」の概念を、弊社で開発した「EZO」というトレーニング手法にミックスすることで、 3,000件のデータセット、2時間×H200×8台のトレーニングで、Japanese MT Benchおよび、Elyza Tasks100におけるベースモデルの日本語性能を向上させることに成功したモデルです。 ", "url": "https://huggingface.co/AXCXEPT/EZO2.5-gemma-3-12b-it-Preview", "project_name": "EZO2.5-gemma-3-12b-it-Preview", "downloads": 39, "source": "Hugging Face", "score": -0.05182808882111835, "first_commit": "2025-04-14 14:07:18", "latest_commit": "2025-04-15 23:49:47", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": "Gemma3ForConditionalGeneration", "multi_labels": [] }, { "description": "About static quants of https://huggingface.co/owner203/japanese-llama-3-8b-instruct-v2 weighted/imatrix quants are available at https://huggingface.co/mradermacher/japanese-llama-3-8b-instruct-v2-i1-GGUF Usage", "url": "https://huggingface.co/mradermacher/japanese-llama-3-8b-instruct-v2-GGUF", "project_name": "japanese-llama-3-8b-instruct-v2-GGUF", "downloads": 39, "source": "Hugging Face", "score": -0.05182808882111835, "first_commit": "2025-01-13 02:59:42", "latest_commit": "2025-01-13 04:07:32", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "📄 ライセンス / License 修正 CreativeML OpenRAIL-M ライセンス / Modified CreativeML OpenRAIL-M license このモデルのクレジットを入れずに使用する Use the model without crediting the creator このモデルで生成した画像を商用利用する Sell images they generate このモデルを商用の画像生成サービスで利用する Run on services that generate images for money このモデルを使用したマージモデルを共有する Share merges using this model このモデル、またはこのモデルをマージしたモデルを販売する Sell this model or merges using this model このモデルをマージしたモデルに異なる権限を設定する Have different permissions when sharing merges 🖼️ 例 / Examples(※他の人が生成した物を表示している場合は本人の許諾を得て", "url": "https://huggingface.co/natsusakiyomi/KaedeMix", "project_name": "KaedeMix", "downloads": 39, "source": "Hugging Face", "score": -0.05182808882111835, "first_commit": "2023-05-13 05:57:13", "latest_commit": "2023-09-25 11:53:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Leia-Swallow-13B LEIA is a training technique for autoregressive LLMs that effectively improves their performance in languages other than English by enhancing cross-lingual knowledge transfer from English to a target language.", "url": "https://huggingface.co/leia-llm/Leia-Swallow-13b", "project_name": "Leia-Swallow-13b", "downloads": 39, "source": "Hugging Face", "score": -0.05182808882111835, "first_commit": "2024-04-17 07:32:11", "latest_commit": "2024-04-18 05:21:10", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Cross-Lingual Transfer", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Overview This model is based on rinna's [rinna/llama-3-youko-8b], fine-tuned using LoRA on a small number of parallel sentences from English to Japanese.", "url": "https://huggingface.co/lyu-boxuan/llama-3-youko-8b-En-Ja-MT-LoRA", "project_name": "llama-3-youko-8b-En-Ja-MT-LoRA", "downloads": 39, "source": "Hugging Face", "score": -0.05182808882111835, "first_commit": "2024-05-10 14:33:57", "latest_commit": "2024-05-21 14:54:46", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This is a little bit different version of kunishou/hh-rlhf-49k-ja without ng_translation == 1 examples.", "url": "https://huggingface.co/datasets/fujiki/japanese_hh-rlhf-49k", "project_name": "japanese_hh-rlhf-49k", "downloads": 39, "source": "Hugging Face", "score": -0.05182808882111835, "first_commit": "2023-05-28 05:55:53", "latest_commit": "2023-05-28 06:08:04", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "This dataset contains document-length Japanese-English parallel texts from various sources.", "url": "https://huggingface.co/datasets/NilanE/SmallParallelDocs-Ja_En-6k", "project_name": "SmallParallelDocs-Ja_En-6k", "downloads": 39, "source": "Hugging Face", "score": -0.05182808882111835, "first_commit": "2024-03-04 21:04:01", "latest_commit": "2024-03-05 03:48:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Annotation and Dataset Development" ] }, { "description": "自動生成のマルチターンデータセットオープンなデータソースから､MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-GGUFを使ってQ&Aを自動生成したものです｡関連コード一部の計算には東京工業大学のスーパーコンピュータTSUBAME4.0を利用しました｡データソースはじめの質問(q1)を､種々のデータソースから収集しました｡その後のやりとりはすべて､Mixtralが生成しました｡質問文については､元データのライセンスに準拠します｡ oasst2-33k-ja apache 2.0 databricks-dolly-15k-ja cc-by-sa-3.0 minnade CC0 cyberagent/chatbot-arena-ja-calm2-7b-chat-experimental cc-by-4.0", "url": "https://huggingface.co/datasets/kanhatakeyama/AutoMultiTurnByMixtral8x22b", "project_name": "AutoMultiTurnByMixtral8x22b", "downloads": 39, "source": "Hugging Face", "score": -0.05182808882111835, "first_commit": "2024-05-18 00:43:54", "latest_commit": "2024-05-22 20:25:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Dialogue Systems & Conversational Agents" ] }, { "description": "Synthetic-JP-EN-Coding-Dataset-801k-50k Aratako/Synthetic-JP-EN-Coding-Dataset-801kから英語部分5万件を抽出したデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-EN-Coding-Dataset-801k-50k", "project_name": "Synthetic-JP-EN-Coding-Dataset-801k-50k", "downloads": 39, "source": "Hugging Face", "score": -0.05182808882111835, "first_commit": "2024-11-22 03:21:08", "latest_commit": "2024-12-10 16:57:38", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "VTuber Overview Dataset (GPT-4o Search Preview) 本データセットは，GPT-4o Search Preview を活用して収集した VTuber に関する活動内容や特徴，コラボ履歴などが自然言語でまとめられておいます。 ", "url": "https://huggingface.co/datasets/Atotti/VTuber-overview", "project_name": "VTuber-overview", "downloads": 39, "source": "Hugging Face", "score": -0.05182808882111835, "first_commit": "2025-03-16 11:53:35", "latest_commit": "2025-03-16 14:53:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Retrieval", "Language Models", "Semantic Text Processing" ] }, { "description": "shunk031/JDocQAのtrain splitに含まれるPDFデータを画像化し、NDLOCRでOCRしたテキストとペアにしたデータセットです。", "url": "https://huggingface.co/datasets/oshizo/japanese-text-image-retrieval-train", "project_name": "japanese-text-image-retrieval-train", "downloads": 39, "source": "Hugging Face", "score": -0.05182808882111835, "first_commit": "2025-01-25 15:18:38", "latest_commit": "2025-01-26 00:13:12", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "zenz-v2.5-dataset zenz-v2.5-datasetはかな漢字変換タスクに特化した条件付き言語モデル「zenz-v2.5」シリーズの学習を目的として構築したデータセットです。 ", "url": "https://huggingface.co/datasets/Miwa-Keita/zenz-v2.5-dataset", "project_name": "zenz-v2.5-dataset", "downloads": 39, "source": "Hugging Face", "score": -0.05182808882111835, "first_commit": "2025-01-13 09:57:07", "latest_commit": "2025-01-17 06:45:33", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "DeepSeek-R1-Distill-Qwen-32B-Japanese GGUF Model Description", "url": "https://huggingface.co/aplulu/cyberagent-DeepSeek-R1-Distill-Qwen-32B-Japanese-GGUF", "project_name": "cyberagent-DeepSeek-R1-Distill-Qwen-32B-Japanese-GGUF", "downloads": 38, "source": "Hugging Face", "score": -0.05183518794681286, "first_commit": "2025-01-28 16:23:44", "latest_commit": "2025-01-29 01:32:12", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Language Models", "Green & Sustainable NLP", "Semantic Text Processing" ] }, { "description": "Model Card for Japanese character-level GPT-2 Small Model description This is a Japanese character-level GPT-2 Small (90M parameters) language model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/gpt2-small-japanese-char", "project_name": "gpt2-small-japanese-char", "downloads": 38, "source": "Hugging Face", "score": -0.05183518794681286, "first_commit": "2023-04-18 08:24:55", "latest_commit": "2023-05-08 10:08:13", "languages": [], "model_or_dataset": "model", "model_size": 0.10300000000000001, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "GGUF版はこちらascktgcc/Mistral-nemo-ja-rp-v0.2-GGUF 概要 Mistral-nemoをEPR用途向けにファインチューニングしたモデルです日本語を含めたデータセットを使用してファインチューニングしたためmagnumのようなモデルよりも日本語力が上がっているはず Mistral-NemoベースなのでTemperatureは0.3を基準に調整することを推奨 system promptに日本語で出力する旨を記載することで英語が混じる問題を抑制できます v0.1からの変更点データセットの追加データセットのsystem promptに<データセットの言語>で出力する指示を追加エポックを9倍に増加使用させていただいたデータセット kalomaze/Opus_Instruct_25k Nopm/Opus_WritingStruct anthracite-org/kalo-opus-instruct-22k-no-refusal Aratako/Synthetic-Japanese-Roleplay-NSFW-Claude-3.5s-15.3k-formatted ", "url": "https://huggingface.co/ascktgcc/Mistral-nemo-ja-rp-v0.2", "project_name": "Mistral-nemo-ja-rp-v0.2", "downloads": 38, "source": "Hugging Face", "score": -0.05183518794681286, "first_commit": "2024-10-21 16:07:14", "latest_commit": "2024-10-28 10:58:54", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "umiyuki様の Japanese-Chat-Umievo-itr004-7b をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Japanese-Chat-Umievo-itr004-7b-GGUF", "project_name": "Japanese-Chat-Umievo-itr004-7b-GGUF", "downloads": 38, "source": "Hugging Face", "score": -0.05183518794681286, "first_commit": "2024-05-13 16:28:41", "latest_commit": "2024-05-13 23:33:49", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Qwen2.5-ja-zh", "url": "https://huggingface.co/hakutaku/qwen2.5-ja-zh", "project_name": "qwen2.5-ja-zh", "downloads": 38, "source": "Hugging Face", "score": -0.05183518794681286, "first_commit": "2024-09-19 14:15:49", "latest_commit": "2024-09-20 07:45:25", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ltgbert-base-japanese-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/ltgbert-base-japanese-ud-goeswith", "project_name": "ltgbert-base-japanese-ud-goeswith", "downloads": 38, "source": "Hugging Face", "score": -0.05183518794681286, "first_commit": "2024-09-13 16:29:53", "latest_commit": "2024-09-14 07:34:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LtgbertForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "japanese music emotion Music2Emotionを使って主に日本の音楽の感情分析を行ったデータセット分析されたデータは以下のようなフォーマットのjsonlになっています。 ", "url": "https://huggingface.co/datasets/ayousanz/japanese-music-emotion", "project_name": "japanese-music-emotion", "downloads": 38, "source": "Hugging Face", "score": -0.05183518794681286, "first_commit": "2025-02-24 03:29:25", "latest_commit": "2025-02-25 16:30:18", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Speech & Audio in NLP", "Multimodality" ] }, { "description": "長文用のinstructionデータセットです。 ", "url": "https://huggingface.co/datasets/aixsatoshi/Longcontext-aozora-instruction", "project_name": "Longcontext-aozora-instruction", "downloads": 38, "source": "Hugging Face", "score": -0.05183518794681286, "first_commit": "2024-03-29 11:35:09", "latest_commit": "2024-03-30 11:44:00", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Model Card for llm-jp-clip-vit-base-patch16 Model Details Japanese CLIP model trained with OpenCLIP on relaion2B-en-research-safe-japanese-translation, a Japanese translation of the English subset of ReLAION-5B (https://huggingface.co/datasets/laion/relaion2B-en-research-safe),", "url": "https://huggingface.co/llm-jp/llm-jp-clip-vit-base-patch16", "project_name": "llm-jp-clip-vit-base-patch16", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2024-12-17 11:15:55", "latest_commit": "2025-02-14 13:41:45", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality" ] }, { "description": "275.86Mのmixtralを日本語データセットでpretrainingしたものです sample from transformers import AutoTokenizer, AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained(\"if001/tiny_mixtral_ja\")", "url": "https://huggingface.co/if001/tiny_mixtral_ja", "project_name": "tiny_mixtral_ja", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2024-01-22 15:02:21", "latest_commit": "2024-01-23 00:42:05", "languages": [], "model_or_dataset": "model", "model_size": 0.276, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "X(Twitter) アカウントぜひ遊びにきてね。 ", "url": "https://huggingface.co/RikkaBotan/style_bert_vits2_jp_extra_cool_original", "project_name": "style_bert_vits2_jp_extra_cool_original", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2024-04-25 03:01:53", "latest_commit": "2024-05-06 21:30:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "llm-jp-3-172b-alpha1", "url": "https://huggingface.co/llm-jp/llm-jp-3-172b-alpha1", "project_name": "llm-jp-3-172b-alpha1", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2024-09-29 16:08:50", "latest_commit": "2024-10-04 03:22:47", "languages": [], "model_or_dataset": "model", "model_size": 172.0, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese Prompt of GuanacoDataset extracted using langdetect.", "url": "https://huggingface.co/datasets/Aruno/guanaco_jp", "project_name": "guanaco_jp", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2023-04-24 03:07:04", "latest_commit": "2023-04-24 03:45:26", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Low-Resource NLP" ] }, { "description": "Synthetic-JP-EN-Translation-Dataset-Magpie-Nemotron-4-20k Magpieの手法をnvidia/Nemotron-4-340B-Instructに対して適用し作成した、20000件の日⇔英翻訳データセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-EN-Translation-Dataset-Magpie-Nemotron-4-20k", "project_name": "Synthetic-JP-EN-Translation-Dataset-Magpie-Nemotron-4-20k", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2024-07-07 11:08:34", "latest_commit": "2024-07-07 11:13:47", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "Tengentoppa corpus for sft (Combined Japanese Instruction Dataset) 概要このデータセットは、日本語の instruction-following データセット16個を統合して作成された大規模な教師あり学習用データセットです。", "url": "https://huggingface.co/datasets/DeL-TaiseiOzaki/Tengentoppa-sft-v1.0", "project_name": "Tengentoppa-sft-v1.0", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2024-11-24 01:20:50", "latest_commit": "2024-12-10 07:36:22", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "JP Voice-Text Dataset for", "url": "https://huggingface.co/datasets/deepghs/fgo_voices_jp", "project_name": "fgo_voices_jp", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2024-08-28 08:56:04", "latest_commit": "2024-08-28 09:14:22", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Description This is a collection of raw data from ~40 Japanese open source downstream task datasets.", "url": "https://huggingface.co/datasets/Ego/jpflan-raw", "project_name": "jpflan-raw", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2024-04-01 06:11:19", "latest_commit": "2024-04-03 00:25:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Dataset Summary 53,640 Japanese tweets with annotation if a tweet is related to COVID-19 or not.", "url": "https://huggingface.co/datasets/community-datasets/covid_tweets_japanese", "project_name": "covid_tweets_japanese", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2022-01-25 16:35:12", "latest_commit": "2024-06-24 11:21:23", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Text Classification", "Annotation and Dataset Development" ] }, { "description": "mbpp-ja", "url": "https://huggingface.co/datasets/llm-jp/mbpp-ja", "project_name": "mbpp-ja", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2024-04-19 00:26:56", "latest_commit": "2024-04-20 06:26:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "For the English version, please click here. ", "url": "https://huggingface.co/datasets/sakusakumura/databricks-dolly-15k-ja-scored", "project_name": "databricks-dolly-15k-ja-scored", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2023-06-27 09:14:41", "latest_commit": "2023-06-27 09:18:39", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "kajuma/CC-news-2024-July-October-cleanedを元に、9月、10月のニュースのみを抜き出したデータセット。 ", "url": "https://huggingface.co/datasets/ikedachin/CC-news-2024-October-cleaned-sft-250127", "project_name": "CC-news-2024-October-cleaned-sft-250127", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2025-01-27 11:31:53", "latest_commit": "2025-01-27 11:38:19", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "合成日本語指示データセット概要このデータセットは、大規模言語モデル（Qwen2.5-32B-instruct）", "url": "https://huggingface.co/datasets/DeL-TaiseiOzaki/magpie-qwen2.5-32b-reasoning-100k", "project_name": "magpie-qwen2.5-32b-reasoning-100k", "downloads": 37, "source": "Hugging Face", "score": -0.051842287072507376, "first_commit": "2024-11-03 07:18:52", "latest_commit": "2024-11-03 07:34:14", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Canary-TTS-150M llm-jp/llm-jp-3-150m-instruct3 をベースに学習したTTSモデルです。 ", "url": "https://huggingface.co/2121-8/canary-tts-150m", "project_name": "canary-tts-150m", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2025-04-22 04:10:31", "latest_commit": "2025-04-23 07:00:03", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [] }, { "description": "zenz-v1 zenz-v1はGPT-2アーキテクチャに基づくかな漢字変換タスクに特化した言語モデルです。", "url": "https://huggingface.co/Miwa-Keita/zenz-v1", "project_name": "zenz-v1", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2024-05-12 15:48:46", "latest_commit": "2024-05-13 16:34:02", "languages": [], "model_or_dataset": "model", "model_size": 0.09509999999999999, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Shisa V2 Shisa V2 is a family of bilingual Japanese and English (JA/EN)", "url": "https://huggingface.co/shisa-ai/shisa-v2-llama3.1-8b", "project_name": "shisa-v2-llama3.1-8b", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2025-04-12 13:15:18", "latest_commit": "2025-04-16 13:26:11", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "Summary This is a text classifier for assigning a JLPT level.", "url": "https://huggingface.co/bennexx/cl-tohoku-bert-base-japanese-v3-jlpt-classifier", "project_name": "cl-tohoku-bert-base-japanese-v3-jlpt-classifier", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2024-01-19 00:32:15", "latest_commit": "2024-07-10 13:41:08", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification" ] }, { "description": "モデルの概要 line-corporation/japanese-large-lm-1.7bのベースモデルに対し，sftによるfull instruction tuningを行いました．", "url": "https://huggingface.co/ToPo-ToPo/line-japanese-large-lm-1.7b-kunishou-databricks-dolly-15k-ja-full-instruction-sft", "project_name": "line-japanese-large-lm-1.7b-kunishou-databricks-dolly-15k-ja-full-instruction-sft", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2024-01-20 12:35:16", "latest_commit": "2024-01-20 12:46:21", "languages": [], "model_or_dataset": "model", "model_size": 1.65, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "X(Twitter) アカウントぜひ遊びにきてね。 ", "url": "https://huggingface.co/RikkaBotan/style_bert_vits2_jp_extra_sweet_original", "project_name": "style_bert_vits2_jp_extra_sweet_original", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2024-04-25 03:01:28", "latest_commit": "2024-05-06 21:29:11", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "「LLM-jp-3 172B beta1」利用規約この利用規約（以下「本規約」といいます）は、大学共同利用機関法人情報・システム研究機構国立情報学研究所（以下「提供者」といいます）による開発の成果物として公開する大規模言語モデル「LLM-jp-3 172B beta1」（以下「本プログラム」といいます）の利用に関する条件を定めるものです。", "url": "https://huggingface.co/llm-jp/llm-jp-3-172b-beta1-instruct", "project_name": "llm-jp-3-172b-beta1-instruct", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 172.0, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "NVIDIA が公開している SteerLM 向けのトライアルデータセット HelpSteerを日本語に自動翻訳したデータセットになります。", "url": "https://huggingface.co/datasets/kunishou/HelpSteer-35k-ja", "project_name": "HelpSteer-35k-ja", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2024-03-02 16:45:19", "latest_commit": "2024-03-03 10:10:54", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Dataset Details Dataset Type:Japanese LLaVA Pretrain is a localized version of the original LLaVA Pretrain dataset.", "url": "https://huggingface.co/datasets/turing-motors/LLaVA-Pretrain-JA", "project_name": "LLaVA-Pretrain-JA", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2024-04-10 05:07:24", "latest_commit": "2024-04-12 09:15:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Annotation and Dataset Development" ] }, { "description": "自動生成Q&A データソースから､MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-GGUFを使ってQ&Aを自動生成したものです｡ Common Crawlをもとに生成しています。 ", "url": "https://huggingface.co/datasets/hatakeyama-llm-team/AutoGeneratedJapaneseQA-CC", "project_name": "AutoGeneratedJapaneseQA-CC", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2024-05-18 03:55:41", "latest_commit": "2024-05-19 09:25:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "東京大学松尾・岩澤研究室主催のLLM講座2024の第5回「SFT」演習で使用するデータセットです。", "url": "https://huggingface.co/datasets/watashihakobashi/ogiri", "project_name": "ogiri", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2024-07-19 20:46:13", "latest_commit": "2024-09-24 20:41:22", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "bluemoon-fandom-1-1-rp-jp-translated A subset of Squish42/bluemoon-fandom-1-1-rp-cleaned translated to Japanese using command-r-08-2024.", "url": "https://huggingface.co/datasets/joujiboi/bluemoon-fandom-1-1-rp-jp-translated", "project_name": "bluemoon-fandom-1-1-rp-jp-translated", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2024-09-02 12:56:09", "latest_commit": "2024-09-09 13:20:16", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "ライブドアニュースコーパスの3行要約データセットです。 ", "url": "https://huggingface.co/datasets/waddledee/three_line_summarization_for_japanese_news_articles", "project_name": "three_line_summarization_for_japanese_news_articles", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2024-04-15 07:34:42", "latest_commit": "2024-04-15 08:09:17", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "CommonCatalog CC-BY Extention このリポジトリはCommonCatalog CC-BYを拡張して、追加の情報を入れたものです。 ", "url": "https://huggingface.co/datasets/alfredplpl/commoncatalog-cc-by-ext", "project_name": "commoncatalog-cc-by-ext", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2024-05-23 00:54:43", "latest_commit": "2024-06-01 01:50:33", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Dataset origin: https://jibiki.fr/data/ Description Les buts du projet Jibiki.fr sont de construire de manière collaborative un dictionnaire français-japonais de qualité et à large couverture ainsi qu'un corpus bilingue aligné.", "url": "https://huggingface.co/datasets/FrancophonIA/Jibiki_fr_ja", "project_name": "Jibiki_fr_ja", "downloads": 36, "source": "Hugging Face", "score": -0.05184938619820188, "first_commit": "2024-10-31 21:32:04", "latest_commit": "2024-10-31 21:43:05", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ku-nlp/roberta-base-japanese-char-wwm Model description This is a Japanese RoBERTa base model pre-trained on Japanese Wikipedia and the Japanese portion of CC-100.", "url": "https://huggingface.co/ku-nlp/roberta-base-japanese-char-wwm", "project_name": "roberta-base-japanese-char-wwm", "downloads": 35, "source": "Hugging Face", "score": -0.0518564853238964, "first_commit": "2022-09-20 05:07:34", "latest_commit": "2023-03-20 08:05:45", "languages": [], "model_or_dataset": "model", "model_size": 0.1, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "(English part follows Japanese one.", "url": "https://huggingface.co/tohoku-nlp/tohokunlp-bert-500m-sq8192-alpha", "project_name": "tohokunlp-bert-500m-sq8192-alpha", "downloads": 35, "source": "Hugging Face", "score": -0.0518564853238964, "first_commit": "2024-11-29 05:24:05", "latest_commit": "2024-12-01 07:39:22", "languages": [], "model_or_dataset": "model", "model_size": 0.581, "model_architectures": "LlamaEncForMaskedLM", "multi_labels": [ "Multilinguality", "Language Models", "Semantic Text Processing" ] }, { "description": "We initialize SPLADE-japanese from tohoku-nlp/bert-base-japanese-v2.", "url": "https://huggingface.co/aken12/splade-japanese", "project_name": "splade-japanese", "downloads": 35, "source": "Hugging Face", "score": -0.0518564853238964, "first_commit": "2024-03-03 09:01:57", "latest_commit": "2024-03-11 03:56:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese Stable Diffusion Pokemon Model Card Stable-Diffusion-Pokemon-ja is a Japanese-specific latent text-to-image diffusion model capable of generating Pokemon images given any text input.", "url": "https://huggingface.co/svjack/Stable-Diffusion-Pokemon-ja", "project_name": "Stable-Diffusion-Pokemon-ja", "downloads": 35, "source": "Hugging Face", "score": -0.0518564853238964, "first_commit": "2022-10-30 08:19:13", "latest_commit": "2023-05-16 09:23:49", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "はじめになんか日本語が話せる商用利用可能なAIです。", "url": "https://huggingface.co/alfredplpl/gemma-2b-it-ja-poc-2", "project_name": "gemma-2b-it-ja-poc-2", "downloads": 35, "source": "Hugging Face", "score": -0.0518564853238964, "first_commit": "2024-03-05 12:17:24", "latest_commit": "2024-03-06 09:21:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Windowsの方はggml-japanese-gpt2の実行ファイルで動くと思います。 ", "url": "https://huggingface.co/datasets/inu-ai/ggml-japanese-gpt2", "project_name": "ggml-japanese-gpt2", "downloads": 35, "source": "Hugging Face", "score": -0.0518564853238964, "first_commit": "2023-04-14 10:51:10", "latest_commit": "2023-04-14 18:11:40", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "データセットについてオープンソースLLMの出力を人手でチェック・修正したinstructionにSwallow-MXでoutputを生成したデータセットです。 ", "url": "https://huggingface.co/datasets/Kendamarron/pret-a-porter-instruction-v0.1", "project_name": "pret-a-porter-instruction-v0.1", "downloads": 35, "source": "Hugging Face", "score": -0.0518564853238964, "first_commit": "2024-03-26 13:08:14", "latest_commit": "2024-04-01 04:30:44", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "AnswerCarefully Dataset 利用規約利用規約本データセットは、日本語および他の言語のLLMの安全性を向上させるという目的のため、商用利用も含め公開しています。 ", "url": "https://huggingface.co/datasets/llm-jp/ac-self-inst", "project_name": "ac-self-inst", "downloads": 35, "source": "Hugging Face", "score": -0.0518564853238964, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "deberta-base-japanese-wikipedia Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia", "project_name": "deberta-base-japanese-wikipedia", "downloads": 34, "source": "Hugging Face", "score": -0.05186358444959091, "first_commit": "2022-06-25 03:46:58", "latest_commit": "2023-01-27 17:51:51", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Semantic Text Processing" ] }, { "description": "Whisper Large V3 Japanese Phone Accent", "url": "https://huggingface.co/AkitoP/whisper-large-v3-japense-phone_accent", "project_name": "whisper-large-v3-japense-phone_accent", "downloads": 34, "source": "Hugging Face", "score": -0.05186358444959091, "first_commit": "2024-10-15 21:26:03", "latest_commit": "2024-10-15 23:22:54", "languages": [], "model_or_dataset": "model", "model_size": 0.809, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", "url": "https://huggingface.co/LoneStriker/SambaLingo-Japanese-Chat-GGUF", "project_name": "SambaLingo-Japanese-Chat-GGUF", "downloads": 34, "source": "Hugging Face", "score": -0.05186358444959091, "first_commit": "2024-03-07 06:38:01", "latest_commit": "2024-03-07 06:48:27", "languages": [], "model_or_dataset": "model", "model_size": 6.95, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/nold/Orion-14B-Base-GGUF", "project_name": "Orion-14B-Base-GGUF", "downloads": 34, "source": "Hugging Face", "score": -0.05186358444959091, "first_commit": "2024-03-07 14:56:51", "latest_commit": "2024-03-07 19:33:53", "languages": [], "model_or_dataset": "model", "model_size": 14.5, "model_architectures": null, "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "llm-jp-1.3b-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/llm-jp-1.3b-upos", "project_name": "llm-jp-1.3b-upos", "downloads": 34, "source": "Hugging Face", "score": -0.05186358444959091, "first_commit": "2024-08-29 05:15:49", "latest_commit": "2024-08-29 14:30:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "The images were sourced from https://huggingface.co/datasets/ThePioneer/japanese-photos.", "url": "https://huggingface.co/datasets/llm-jp/japanese-photos-conversation", "project_name": "japanese-photos-conversation", "downloads": 34, "source": "Hugging Face", "score": -0.05186358444959091, "first_commit": "2024-10-22 06:39:57", "latest_commit": "2024-11-19 10:46:49", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Visual Data in NLP", "Dialogue Systems & Conversational Agents" ] }, { "description": "AttaQ-JA Dataset Card AttaQ red teaming dataset was designed to evaluate Large Language Models (LLMs) by assessing their tendency to generate harmful or undesirable responses, which consists of 1402 carefully crafted adversarial questions.", "url": "https://huggingface.co/datasets/ibm-research/AttaQ-JA", "project_name": "AttaQ-JA", "downloads": 34, "source": "Hugging Face", "score": -0.05186358444959091, "first_commit": "2024-09-27 00:04:13", "latest_commit": "2025-03-06 01:26:26", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP" ] }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/senryu-debug\", split=\"test\") 概要大喜利生成の動作確認用データセットです。", "url": "https://huggingface.co/datasets/YANS-official/senryu-debug", "project_name": "senryu-debug", "downloads": 34, "source": "Hugging Face", "score": -0.05186358444959091, "first_commit": "2024-08-30 05:47:58", "latest_commit": "2024-09-04 10:49:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Text Generation", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Introduction Who am I: Qishen Ha", "url": "https://huggingface.co/haqishen/Llama-3-8B-Japanese-Instruct", "project_name": "Llama-3-8B-Japanese-Instruct", "downloads": 33, "source": "Hugging Face", "score": -0.05187068357528542, "first_commit": "2024-04-23 04:41:19", "latest_commit": "2024-05-02 03:36:10", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "nlp-waseda/gpt2-xl-japanese This is Japanese GPT2 with approximately 1.5B parameters pretrained on Japanese Wikipedia and CC-100", "url": "https://huggingface.co/nlp-waseda/gpt2-xl-japanese", "project_name": "gpt2-xl-japanese", "downloads": 33, "source": "Hugging Face", "score": -0.05187068357528542, "first_commit": "2022-11-30 04:33:31", "latest_commit": "2023-06-21 04:29:10", "languages": [], "model_or_dataset": "model", "model_size": 1.61, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "bert-japanese-ner このモデルは日本語の固有表現抽出タスクを目的として、京都大学黒橋・褚・村脇研究室が公開しているBERT日本語Pretrainedモデルをベースにストックマーク株式会社が公開しているner-wikipedia-datasetでファインチューニングしたものです。 ", "url": "https://huggingface.co/ken11/bert-japanese-ner", "project_name": "bert-japanese-ner", "downloads": 33, "source": "Hugging Face", "score": -0.05187068357528542, "first_commit": "2021-11-13 16:28:23", "latest_commit": "2021-11-14 02:34:01", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "Oumuamua-7b-instruct-GGUF This is quantized version of nitky/Oumuamua-7b-instruct created using llama.cpp Model Description This is a merge of pre-trained language models created using mergekit. ", "url": "https://huggingface.co/QuantFactory/Oumuamua-7b-instruct-GGUF", "project_name": "Oumuamua-7b-instruct-GGUF", "downloads": 33, "source": "Hugging Face", "score": -0.05187068357528542, "first_commit": "2024-06-19 08:52:12", "latest_commit": "2024-06-19 11:40:58", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "HODACHI様の Llama-3-EZO-8b-Common-it をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Llama-3-EZO-8b-Common-it-GGUF", "project_name": "Llama-3-EZO-8b-Common-it-GGUF", "downloads": 33, "source": "Hugging Face", "score": -0.05187068357528542, "first_commit": "2024-07-15 11:58:12", "latest_commit": "2024-07-15 20:08:22", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "cosmopedia-japanese-20kのデータに、kunishou様から20k-100kをご提供いただけることになり100kまで拡大しました。 ", "url": "https://huggingface.co/datasets/aixsatoshi/cosmopedia-japanese-100k", "project_name": "cosmopedia-japanese-100k", "downloads": 33, "source": "Hugging Face", "score": -0.05187068357528542, "first_commit": "2024-03-03 16:06:15", "latest_commit": "2024-03-03 16:20:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "kajuma/CC-news-2024-July-October-cleanedを元に、9月、10月のニュースのみを抜き出したデータセット。", "url": "https://huggingface.co/datasets/ikedachin/CC-news-2024-October-cleaned-1204", "project_name": "CC-news-2024-October-cleaned-1204", "downloads": 33, "source": "Hugging Face", "score": -0.05187068357528542, "first_commit": "2024-12-04 08:10:52", "latest_commit": "2025-01-27 02:19:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "倫理に関するデータセット概要このデータセットは日本語の倫理に関するデータセットです。", "url": "https://huggingface.co/datasets/swdq/ethics", "project_name": "ethics", "downloads": 33, "source": "Hugging Face", "score": -0.05187068357528542, "first_commit": "2025-03-25 12:12:25", "latest_commit": "2025-03-25 12:20:59", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Dataset Information このデータセットは、ThePioneer/japanese-photosの写真をお借りして、", "url": "https://huggingface.co/datasets/Kendamarron/japanese-photo-instruction", "project_name": "japanese-photo-instruction", "downloads": 33, "source": "Hugging Face", "score": -0.05187068357528542, "first_commit": "2024-12-01 13:42:28", "latest_commit": "2024-12-02 14:02:17", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Places in japan.", "url": "https://huggingface.co/datasets/JapanDegitalMaterial/Places_in_Japan", "project_name": "Places_in_Japan", "downloads": 33, "source": "Hugging Face", "score": -0.05187068357528542, "first_commit": "2023-09-23 12:35:06", "latest_commit": "2023-09-23 14:00:16", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", "url": "https://huggingface.co/sambanovasystems/SambaLingo-Japanese-Chat", "project_name": "SambaLingo-Japanese-Chat", "downloads": 32, "source": "Hugging Face", "score": -0.05187778270097993, "first_commit": "2024-02-15 22:45:08", "latest_commit": "2024-04-16 22:32:15", "languages": [], "model_or_dataset": "model", "model_size": 6.95, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "日本語モデルカード/Japanese model card 日本語のブログ/Full Japanese dev blog Development source code/開発ソースコード Karasu-DPO-7B", "url": "https://huggingface.co/lightblue/Karasu-DPO-7B", "project_name": "Karasu-DPO-7B", "downloads": 32, "source": "Hugging Face", "score": -0.05187778270097993, "first_commit": "2025-01-27 06:48:02", "latest_commit": "2025-01-27 08:44:52", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Multilinguality" ] }, { "description": "Yaki-Dofu-Mix 概要 / Overview Yaki-Dofu-Mixは、アニメ風の画風に特化したマージモデルです。 ", "url": "https://huggingface.co/Vsukiyaki/Yaki-Dofu-Mix", "project_name": "Yaki-Dofu-Mix", "downloads": 32, "source": "Hugging Face", "score": -0.05187778270097993, "first_commit": "2023-12-23 09:26:20", "latest_commit": "2023-12-24 11:07:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "[EZO model card]", "url": "https://huggingface.co/HODACHI/EZO-InternVL2-26B", "project_name": "EZO-InternVL2-26B", "downloads": 32, "source": "Hugging Face", "score": -0.05187778270097993, "first_commit": "2024-08-19 08:03:55", "latest_commit": "2024-08-19 10:54:31", "languages": [], "model_or_dataset": "model", "model_size": 25.5, "model_architectures": "InternVLChatModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "CABank Japanese Sakura Corpus Susanne Miyata Department of Medical Sciences Aichi Shukotoku University smiyata@asu.aasa.ac.jp website: https://ca.talkbank.org/access/Sakura.html Important", "url": "https://huggingface.co/datasets/Fhrozen/CABankSakura", "project_name": "CABankSakura", "downloads": 32, "source": "Hugging Face", "score": -0.05187778270097993, "first_commit": "2022-09-14 05:47:24", "latest_commit": "2022-12-03 03:26:50", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "kajuma/CC-news-2024-July-October-cleanedを元に、10月のニュースのみを抜き出したデータセット。", "url": "https://huggingface.co/datasets/ikedachin/CC-news-2024-October-cleaned-sft-1204", "project_name": "CC-news-2024-October-cleaned-sft-1204", "downloads": 32, "source": "Hugging Face", "score": -0.05187778270097993, "first_commit": "2024-12-04 08:10:52", "latest_commit": "2025-01-27 02:19:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "roberta-base-japanese-jsnli This model is a fine-tuned version of nlp-waseda/roberta-base-japanese on the JSNLI dataset.", "url": "https://huggingface.co/Formzu/roberta-base-japanese-jsnli", "project_name": "roberta-base-japanese-jsnli", "downloads": 31, "source": "Hugging Face", "score": -0.051884881826674445, "first_commit": "2022-10-14 07:50:47", "latest_commit": "2022-10-19 11:08:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForSequenceClassification", "multi_labels": [ "Information Extraction & Text Mining", "Responsible & Trustworthy NLP", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "Model Card for Model ID Fine tunned ASR model from distil-whisper/distil-large-v2.", "url": "https://huggingface.co/spow12/Visual-novel-transcriptor", "project_name": "Visual-novel-transcriptor", "downloads": 31, "source": "Hugging Face", "score": -0.051884881826674445, "first_commit": "2024-04-15 01:43:08", "latest_commit": "2024-08-12 12:39:52", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Speech Recognition", "Text Generation", "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "This model is the fine-tuned version of Helsinki-NLP/opus-mt-ja-en on bsd_ja_en dataset.", "url": "https://huggingface.co/minkhantycc/translation-en-ja", "project_name": "translation-en-ja", "downloads": 31, "source": "Hugging Face", "score": -0.051884881826674445, "first_commit": "2024-02-08 08:08:41", "latest_commit": "2024-03-20 05:41:04", "languages": [], "model_or_dataset": "model", "model_size": 0.07529999999999999, "model_architectures": "MarianMTModel", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "MambaSan-370m 🐍 MambaSan-370m is the first chat Japanese language model based on a state-space model architecture (Mamba).", "url": "https://huggingface.co/loiccabannes/MambaSan-370m", "project_name": "MambaSan-370m", "downloads": 31, "source": "Hugging Face", "score": -0.051884881826674445, "first_commit": "2024-02-11 16:52:05", "latest_commit": "2024-02-15 19:57:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "X(Twitter) アカウントぜひ遊びにきてね。 ", "url": "https://huggingface.co/RikkaBotan/style_bert_vits2_jp_extra_asmr_original", "project_name": "style_bert_vits2_jp_extra_asmr_original", "downloads": 31, "source": "Hugging Face", "score": -0.051884881826674445, "first_commit": "2024-04-28 06:06:03", "latest_commit": "2024-05-06 21:29:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "This dataset was created by automatically translating \"databricks-dolly-15k\" into Japanese.", "url": "https://huggingface.co/datasets/kunishou/databricks-dolly-69k-ja-en-translation", "project_name": "databricks-dolly-69k-ja-en-translation", "downloads": 31, "source": "Hugging Face", "score": -0.051884881826674445, "first_commit": "2023-04-17 18:31:42", "latest_commit": "2023-10-21 15:09:14", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "metamath_ja_950_reka3flash meta-math/MetaMathQAの最初の1000件をRekaAI/reka-flash-3で翻訳した後、フォーマットが維持されなかったものを除去しました。 ", "url": "https://huggingface.co/datasets/kurogane/metamath_ja_950_reka3flash", "project_name": "metamath_ja_950_reka3flash", "downloads": 31, "source": "Hugging Face", "score": -0.051884881826674445, "first_commit": "2025-03-15 07:11:16", "latest_commit": "2025-03-15 10:35:19", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "オリジナルのサイトと同じものを使用しています。 ", "url": "https://huggingface.co/datasets/llm-book/ja-vicuna-qa-benchmark", "project_name": "ja-vicuna-qa-benchmark", "downloads": 31, "source": "Hugging Face", "score": -0.051884881826674445, "first_commit": "2024-06-25 22:14:55", "latest_commit": "2024-08-31 12:37:25", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Question Answering", "Annotation and Dataset Development" ] }, { "description": "Chatbot Arena Conversationsの質問文から、aixsatoshi/Swallow-MX-8x7b-NVE-chatvector-Mixtral-instruct-v2を使用して応答文を作成しました質問文は、以下のモデルのPrompt部分を使用しました Chatbot Arena Conversations JA (calm2) 以下引用です。 ", "url": "https://huggingface.co/datasets/aixsatoshi/Swallow-MX-chatbot-DPO", "project_name": "Swallow-MX-chatbot-DPO", "downloads": 31, "source": "Hugging Face", "score": -0.051884881826674445, "first_commit": "2024-03-31 06:42:39", "latest_commit": "2024-03-31 08:16:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "oasst1-89k-jaをチャット形式に変換したデータセットになります。", "url": "https://huggingface.co/datasets/kunishou/oasst1-chat-44k-ja", "project_name": "oasst1-chat-44k-ja", "downloads": 31, "source": "Hugging Face", "score": -0.051884881826674445, "first_commit": "2023-11-12 07:53:04", "latest_commit": "2023-12-25 13:22:22", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Wikipedia日本語版データセット(izumi-lab/wikipedia-ja-20230720)", "url": "https://huggingface.co/datasets/shi3z/Japanese_Wikipedia_Conversation", "project_name": "Japanese_Wikipedia_Conversation", "downloads": 31, "source": "Hugging Face", "score": -0.051884881826674445, "first_commit": "2023-11-10 07:36:40", "latest_commit": "2023-11-10 22:46:29", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "データセット概要手動で作成したDatabricksに関する質問と回答ペアの日本語データセットです。 ", "url": "https://huggingface.co/datasets/yulanfmy/databricks-qa-ja", "project_name": "databricks-qa-ja", "downloads": 31, "source": "Hugging Face", "score": -0.051884881826674445, "first_commit": "2023-05-15 13:27:23", "latest_commit": "2023-05-15 14:55:06", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Oumuamua-7b-instruct-v2 🚨 If you want to avoid outputs that appear to be literal translations, please prompt this model to role-play as a Japanese person.", "url": "https://huggingface.co/nitky/Oumuamua-7b-instruct-v2", "project_name": "Oumuamua-7b-instruct-v2", "downloads": 30, "source": "Hugging Face", "score": -0.05189198095236895, "first_commit": "2024-06-14 07:08:07", "latest_commit": "2024-06-19 22:29:07", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Natural Language Interfaces", "Dialogue Response Generation", "Dialogue Systems & Conversational Agents", "Language Models" ] }, { "description": "Whisper Small Ja - Remastered - vlzcrz", "url": "https://huggingface.co/vlzcrz/vlzcrz-whisper-small-japanese-2", "project_name": "vlzcrz-whisper-small-japanese-2", "downloads": 30, "source": "Hugging Face", "score": -0.05189198095236895, "first_commit": "2025-01-21 14:44:51", "latest_commit": "2025-01-21 18:02:34", "languages": [], "model_or_dataset": "model", "model_size": 0.242, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [] }, { "description": "LLaVA-JP Model Card Model detail Model type: LLaVA-JP is a vision-language model that can converse about input images.", "url": "https://huggingface.co/toshi456/llava-jp-1.3b-v1.0", "project_name": "llava-jp-1.3b-v1.0", "downloads": 30, "source": "Hugging Face", "score": -0.05189198095236895, "first_commit": "2023-12-04 13:13:03", "latest_commit": "2023-12-18 10:21:11", "languages": [], "model_or_dataset": "model", "model_size": 1.73, "model_architectures": "LlavaGpt2ForCausalLM", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "This is a Japanese sentence-LUKE model.", "url": "https://huggingface.co/cheonboy/sentence_embedding_japanese", "project_name": "sentence_embedding_japanese", "downloads": 30, "source": "Hugging Face", "score": -0.05189198095236895, "first_commit": "2023-10-05 05:10:25", "latest_commit": "2023-10-05 05:13:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeModel", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "electra-base-japanese-discriminator (sudachitra-wordpiece, mC4 Japanese) -", "url": "https://huggingface.co/megagonlabs/electra-base-japanese-discriminator", "project_name": "electra-base-japanese-discriminator", "downloads": 30, "source": "Hugging Face", "score": -0.05189198095236895, "first_commit": "2022-06-03 06:49:32", "latest_commit": "2022-06-03 07:25:56", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForPreTraining", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models" ] }, { "description": "Sarashina2-7B Instruct sarashina2-7Bを会話できるようにフルファインチューニングしたものです。", "url": "https://huggingface.co/alfredplpl/sarashina2-7b-it", "project_name": "sarashina2-7b-it", "downloads": 30, "source": "Hugging Face", "score": -0.05189198095236895, "first_commit": "2024-06-12 02:24:28", "latest_commit": "2024-06-12 03:00:35", "languages": [], "model_or_dataset": "model", "model_size": 7.32, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Ninja-v1-RP-expressive-v2 GGUF版はこちら/Click here for the GGUF version 概要 This is a merge of pre-trained language models created using mergekit. ", "url": "https://huggingface.co/Aratako/Ninja-v1-RP-expressive-v2", "project_name": "Ninja-v1-RP-expressive-v2", "downloads": 30, "source": "Hugging Face", "score": -0.05189198095236895, "first_commit": "2024-05-25 16:56:18", "latest_commit": "2024-05-26 15:20:52", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Derived from 青空文庫及びサピエの点字データから作成した振り仮名のデータセット（GitHub） https://github.com/ndl-lab/huriganacorpus-aozora Certain mismatches in the original corpus were eliminated during validation (307 instances) Error: 烈しい調子である。 ", "url": "https://huggingface.co/datasets/Calvin-Xu/Furigana-Aozora", "project_name": "Furigana-Aozora", "downloads": 30, "source": "Hugging Face", "score": -0.05189198095236895, "first_commit": "2024-07-28 02:29:57", "latest_commit": "2024-07-28 09:09:55", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "📘📕 SimpleStories 📙📗 このデータセットは、gpt-4o-miniによって生成された短編小説で出来ているデータセットです。", "url": "https://huggingface.co/datasets/lennart-finke/SimpleStories-JA", "project_name": "SimpleStories-JA", "downloads": 30, "source": "Hugging Face", "score": -0.05189198095236895, "first_commit": "2025-02-09 15:58:43", "latest_commit": "2025-02-13 17:48:16", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "novecomi-novel-metadata https://dengekibunko.jp/novecomi/novel/ からスクレイピング。 ", "url": "https://huggingface.co/datasets/p1atdev/novecomi-novel-metadata", "project_name": "novecomi-novel-metadata", "downloads": 30, "source": "Hugging Face", "score": -0.05189198095236895, "first_commit": "2023-12-16 08:48:49", "latest_commit": "2023-12-16 08:57:17", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Hibiki ASR Phonemizer This model is a Phoneme Level Speech Recognition network, originally a fine-tuned version of openai/whisper-large-v3 on a mixture of Different Japanese datasets.", "url": "https://huggingface.co/Respair/Hibiki_ASR_Phonemizer_v0.2", "project_name": "Hibiki_ASR_Phonemizer_v0.2", "downloads": 29, "source": "Hugging Face", "score": -0.051899080078063466, "first_commit": "2024-08-12 01:30:08", "latest_commit": "2024-08-19 18:13:01", "languages": [], "model_or_dataset": "model", "model_size": 1.54, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Speech Recognition", "Text Generation", "Speech & Audio in NLP", "Multimodality" ] }, { "description": "This model is for transcribing audio into Hiragana, one format of Japanese language.", "url": "https://huggingface.co/vitouphy/wav2vec2-xls-r-300m-japanese", "project_name": "wav2vec2-xls-r-300m-japanese", "downloads": 29, "source": "Hugging Face", "score": -0.051899080078063466, "first_commit": "2022-01-31 17:27:45", "latest_commit": "2022-03-23 18:30:07", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "llm-book/bert-base-japanese-v3-crf-ner-wikipedia-dataset 「大規模言語モデル入門」の第6章で紹介している固有表現認識のモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-crf-ner-wikipedia-dataset", "project_name": "bert-base-japanese-v3-crf-ner-wikipedia-dataset", "downloads": 29, "source": "Hugging Face", "score": -0.051899080078063466, "first_commit": "2023-05-28 08:19:43", "latest_commit": "2023-07-25 15:04:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertWithCrfForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition", "Language Models", "Semantic Text Processing" ] }, { "description": "BERT base Japanese - JaQuAD Description A Japanese Question Answering model fine-tuned on JaQuAD.", "url": "https://huggingface.co/SkelterLabsInc/bert-base-japanese-jaquad", "project_name": "bert-base-japanese-jaquad", "downloads": 29, "source": "Hugging Face", "score": -0.051899080078063466, "first_commit": "2022-01-27 08:08:53", "latest_commit": "2022-02-04 02:39:25", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForQuestionAnswering", "multi_labels": [ "Natural Language Interfaces", "Representation Learning", "Question Answering", "Language Models", "Semantic Text Processing" ] }, { "description": "Model Details Model Description This repository provides Asagi-2B, a large-scale Japanese Vision & Language Model (VLM).", "url": "https://huggingface.co/MIL-UT/Asagi-2B", "project_name": "Asagi-2B", "downloads": 29, "source": "Hugging Face", "score": -0.051899080078063466, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 2.31, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "Dolly 日本語翻訳版このリポジトリは、Databricksが開発したdollyプロジェクトの日本語翻訳版です。 ", "url": "https://huggingface.co/datasets/takosama/databricks-dolly-15k-ja-google-trans", "project_name": "databricks-dolly-15k-ja-google-trans", "downloads": 29, "source": "Hugging Face", "score": -0.051899080078063466, "first_commit": "2023-04-13 15:38:17", "latest_commit": "2023-04-13 17:18:21", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "英語Wikipedia記事の冒頭複数文を抽出し、人手で日本語翻訳した文章レベル対訳データセットです。 ", "url": "https://huggingface.co/datasets/hpprc/honyaku", "project_name": "honyaku", "downloads": 29, "source": "Hugging Face", "score": -0.051899080078063466, "first_commit": "2024-11-20 04:42:28", "latest_commit": "2024-11-20 09:06:40", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "For more information, see website below!", "url": "https://huggingface.co/datasets/Hoshikuzu/JParaCrawl", "project_name": "JParaCrawl", "downloads": 29, "source": "Hugging Face", "score": -0.051899080078063466, "first_commit": "2024-08-24 15:07:12", "latest_commit": "2024-08-25 13:15:52", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "nlp-waseda/roberta-large-japanese-seq512 Model description This is a Japanese RoBERTa large model pretrained on Japanese Wikipedia and the Japanese portion of CC-100 with the maximum sequence length of 512.", "url": "https://huggingface.co/nlp-waseda/roberta-large-japanese-seq512", "project_name": "roberta-large-japanese-seq512", "downloads": 28, "source": "Hugging Face", "score": -0.05190617920375797, "first_commit": "2022-06-13 09:46:45", "latest_commit": "2022-10-21 14:49:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "roberta-small-hi-char Model Description", "url": "https://huggingface.co/nakamura196/roberta-small-hi-char", "project_name": "roberta-small-hi-char", "downloads": 28, "source": "Hugging Face", "score": -0.05190617920375797, "first_commit": "2022-07-11 06:35:00", "latest_commit": "2022-07-15 05:32:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "AXCXEPT/phi-4-open-R1-Distill-EZOv1 Model Details This model is a Reasoner version of the phi-4 model by employing open-r1, which mimics the Distill methodology of Deepseek-R1.", "url": "https://huggingface.co/AXCXEPT/phi-4-open-R1-Distill-EZOv1", "project_name": "phi-4-open-R1-Distill-EZOv1", "downloads": 28, "source": "Hugging Face", "score": -0.05190617920375797, "first_commit": "2025-01-26 12:14:18", "latest_commit": "2025-01-27 02:22:50", "languages": [], "model_or_dataset": "model", "model_size": 14.7, "model_architectures": "Phi3ForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-Chat-Plugin", "project_name": "Orion-14B-Chat-Plugin", "downloads": 28, "source": "Hugging Face", "score": -0.05190617920375797, "first_commit": "2024-01-16 12:19:45", "latest_commit": "2024-03-26 10:12:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OrionForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "Dataset Details Dataset Type:Japanese LLaVA Instruct 150K is a localized version of the original LLaVA Visual Instruct 150K dataset.", "url": "https://huggingface.co/datasets/turing-motors/LLaVA-Instruct-150K-JA", "project_name": "LLaVA-Instruct-150K-JA", "downloads": 28, "source": "Hugging Face", "score": -0.05190617920375797, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Asian Language Treebank (ALT) Project ALT Parallel Corpusのうち、日英対訳部分のみを抽出したデータセットです。", "url": "https://huggingface.co/datasets/hpprc/alt-parallel-en-ja", "project_name": "alt-parallel-en-ja", "downloads": 28, "source": "Hugging Face", "score": -0.05190617920375797, "first_commit": "2024-03-21 02:24:27", "latest_commit": "2024-03-21 12:40:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Syntactic Parsing", "Annotation and Dataset Development" ] }, { "description": "Multilingual Image Translation Dataset： OPUS-MIT-5M", "url": "https://huggingface.co/datasets/liboaccn/OPUS-MIT-5M", "project_name": "OPUS-MIT-5M", "downloads": 28, "source": "Hugging Face", "score": -0.05190617920375797, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Visual Data in NLP", "Text Generation", "Machine Translation", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "(English part follows Japanese one.", "url": "https://huggingface.co/tohoku-nlp/stable-diffusion-xl-jp-base-1.0", "project_name": "stable-diffusion-xl-jp-base-1.0", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2023-11-06 05:02:27", "latest_commit": "2023-11-06 05:37:01", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Fugaku-LLM利用規約この利用規約（以下「本規約」といいます）は、富士通株式会社、国立研究開発法人理化学研究所、国立大学法人東京工業大学、国立大学法人東北大学、株式会社サイバーエージェント、国立大学法人東海国立大学機構、及び株式会社Kotoba Technologies Japan (以下「開発者」といいます)による、スーパーコンピュータ「富岳」政策対応枠における大規模言語モデル分散並列学習手法の開発の成果物として公開する大規模言語モデル（以下「Fugaku-LLM」といいます）の利用に関する条件を定めるものです。", "url": "https://huggingface.co/Fugaku-LLM/Fugaku-LLM-13B", "project_name": "Fugaku-LLM-13B", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 13.3, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "J-Moshi: A Japanese Full-duplex Spoken Dialogue System J-Moshiは，日本語におけるfull-duplex音声対話システムです．", "url": "https://huggingface.co/nu-dialogue/j-moshi", "project_name": "j-moshi", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2025-01-21 15:23:45", "latest_commit": "2025-02-15 03:03:53", "languages": [], "model_or_dataset": "model", "model_size": 7.69, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "merge This is a merge of pre-trained language models created using mergekit.", "url": "https://huggingface.co/yamatazen/Shisa-v2-Mistral-Nemo-12B-Abliterated", "project_name": "Shisa-v2-Mistral-Nemo-12B-Abliterated", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2025-04-21 06:56:06", "latest_commit": "2025-04-21 07:03:51", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "One more step before getting this model.", "url": "https://huggingface.co/rinna/japanese-stable-diffusion", "project_name": "japanese-stable-diffusion", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Model Card for Model ID このモデルはrinna/japanese-gpt-1bをベースモデルとして、コンテキストからの抽出型QAと、解答を新たなコンテキストでリファインするための学習を行ったモデルです。 ", "url": "https://huggingface.co/oshizo/qa-refine-japanese-gpt-1b", "project_name": "qa-refine-japanese-gpt-1b", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2023-01-18 15:43:39", "latest_commit": "2023-01-19 10:14:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "Model Details Model Description This repository provides Asagi-4B, a large-scale Japanese Vision & Language Model (VLM).", "url": "https://huggingface.co/MIL-UT/Asagi-4B", "project_name": "Asagi-4B", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 4.24, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "Model Card for Wabisabi-v1.0 The Mistral-7B--based Large Language Model (LLM) is an noveldataset fine-tuned version of the Mistral-7B-v0.1 wabisabi has the following changes compared to Mistral-7B-v0.1.", "url": "https://huggingface.co/Local-Novel-LLM-project/WabiSabi-V1", "project_name": "WabiSabi-V1", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2024-09-30 00:30:36", "latest_commit": "2024-12-03 03:35:43", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Text Generation", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Tanuki-ZeRo-gguf kanhatakeyamaさんが公開しているTanuki-ZeRoのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Tanuki-ZeRo-gguf", "project_name": "Tanuki-ZeRo-gguf", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2024-03-30 10:49:02", "latest_commit": "2024-03-30 17:01:16", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "kurogane/Llama3-BioYouri-8B-mergetest このモデルは生物学・医学に精通したOpenBioLLM-8Bをベースに、日本語対応を向上させるためにLlama-3-youko-8b-instruct-chatvectorとマージさせたモデルです。 ", "url": "https://huggingface.co/kurogane/Llama3-BioYouri-8B-instruct-chatvector-mergetest", "project_name": "Llama3-BioYouri-8B-instruct-chatvector-mergetest", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2024-05-10 08:34:11", "latest_commit": "2024-05-21 12:53:33", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "upskyy/gte-korean-base This model is korsts and kornli finetuning model from Alibaba-NLP/gte-multilingual-base.", "url": "https://huggingface.co/upskyy/gte-base-korean", "project_name": "gte-base-korean", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2024-08-08 14:34:44", "latest_commit": "2024-08-08 15:29:27", "languages": [], "model_or_dataset": "model", "model_size": 0.305, "model_architectures": "NewModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "bert-base-japanese-v3-jsts 「大規模言語モデル入門」の第5章で紹介している(意味類似度計算)のモデルです。 ", "url": "https://huggingface.co/masato12/bert-base-japanese-v3-jsts-with-tokenizer", "project_name": "bert-base-japanese-v3-jsts-with-tokenizer", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2024-07-21 04:58:46", "latest_commit": "2024-07-21 18:21:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "埋め込みモデルの学習、評価のためのクラスタリングデータセットです。 ", "url": "https://huggingface.co/datasets/oshizo/LawClustering-ja", "project_name": "LawClustering-ja", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2024-06-23 05:25:56", "latest_commit": "2024-06-23 15:35:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "llm-jp-corpus-v3のwarp_htmlのうちlevel2フィルタリングされたデータをHFフォーマットに変換し、各データに付与されたURLから元記事のタイトルを取得可能なものについては取得して付与したデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/llmjp-warp-html", "project_name": "llmjp-warp-html", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2024-09-18 02:41:06", "latest_commit": "2024-09-20 10:33:29", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "A Japanese dataset generated with Qwen/Qwen1.5-14B model.", "url": "https://huggingface.co/datasets/iam-ajaymeena/Self-Instruct-Japanese-Qwen1.5-14B", "project_name": "Self-Instruct-Japanese-Qwen1.5-14B", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2024-06-16 06:04:11", "latest_commit": "2024-06-16 09:12:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation" ] }, { "description": "A Japanese dataset generated with an opensource elyza/ELYZA-japanese-Llama-2-13b-instruct model.", "url": "https://huggingface.co/datasets/iam-ajaymeena/Self-Instruct-Japanese-Elzya-13B", "project_name": "Self-Instruct-Japanese-Elzya-13B", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2024-06-15 08:11:55", "latest_commit": "2024-06-16 09:12:56", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation", "Language Models" ] }, { "description": "Scenery of japan.", "url": "https://huggingface.co/datasets/JapanDegitalMaterial/Scenery_of_japan", "project_name": "Scenery_of_japan", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2023-09-23 11:08:44", "latest_commit": "2023-09-23 14:32:48", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "#Origin The name comes from \"hachiwari/はちわれ\" (chiikawa/ちいかわ).", "url": "https://huggingface.co/datasets/Sakalti/hachiwari", "project_name": "hachiwari", "downloads": 27, "source": "Hugging Face", "score": -0.051913278329452486, "first_commit": "2024-10-05 01:31:37", "latest_commit": "2024-12-15 04:40:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "BERT large Japanese (character-level tokenization with whole word masking, jawiki-20200831)", "url": "https://huggingface.co/tohoku-nlp/bert-large-japanese-char", "project_name": "bert-large-japanese-char", "downloads": 26, "source": "Hugging Face", "score": -0.051920377455147, "first_commit": "2021-03-05 06:36:24", "latest_commit": "2021-09-23 15:45:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "roberta-small-japanese-aozora-char Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-small-japanese-aozora-char", "project_name": "roberta-small-japanese-aozora-char", "downloads": 26, "source": "Hugging Face", "score": -0.051920377455147, "first_commit": "2021-12-23 02:38:26", "latest_commit": "2021-12-23 11:55:42", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "Model trained on 800,000 Japanese sentences after reducing oshizo/japanese-e5-mistral-7b_slerp to 8 layers.", "url": "https://huggingface.co/oshizo/japanese-e5-mistral-1.9b", "project_name": "japanese-e5-mistral-1.9b", "downloads": 26, "source": "Hugging Face", "score": -0.051920377455147, "first_commit": "2024-02-02 12:39:11", "latest_commit": "2024-02-03 00:28:28", "languages": [], "model_or_dataset": "model", "model_size": 1.88, "model_architectures": "MistralForEmbedding", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Overview of bert-japanese-12M The bert-japanese-12M model is a transformer-based model with BERT architecture, which is designed to be used on Japanese text.", "url": "https://huggingface.co/nptdat/bert-japanese-12M", "project_name": "bert-japanese-12M", "downloads": 26, "source": "Hugging Face", "score": -0.051920377455147, "first_commit": "2024-08-16 16:46:49", "latest_commit": "2024-08-19 02:56:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese-LLaMA-3-8B-Instruct-v2-GGUF Japanese-LLaMA-3-8B-Instruct-v2-GGUFはJapanese-LLaMA-3-8B-Instruct-v2のGGUF形式です。 ", "url": "https://huggingface.co/owner203/japanese-llama-3-8b-instruct-v2-gguf", "project_name": "japanese-llama-3-8b-instruct-v2-gguf", "downloads": 26, "source": "Hugging Face", "score": -0.051920377455147, "first_commit": "2024-06-10 11:21:01", "latest_commit": "2024-06-21 06:35:03", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "概要このデータセットはOpen_o1_sft_ProデータセットをQwen社のQwen2.5-14B-Instructを用いて日本語に翻訳したものになります。 ", "url": "https://huggingface.co/datasets/blastai/Open_o1_sft_Pro_translated_jp", "project_name": "Open_o1_sft_Pro_translated_jp", "downloads": 26, "source": "Hugging Face", "score": -0.051920377455147, "first_commit": "2024-12-29 16:29:37", "latest_commit": "2024-12-30 18:43:22", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation" ] }, { "description": "データセットについてこのデータセットは、 HuggingFaceTB/everyday-conversations-llama3.1-2k を機械翻訳で日本語化したものになります。 ", "url": "https://huggingface.co/datasets/U23-lab/everyday_conversations_ja", "project_name": "everyday_conversations_ja", "downloads": 26, "source": "Hugging Face", "score": -0.051920377455147, "first_commit": "2024-12-28 12:06:30", "latest_commit": "2024-12-28 12:26:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "Abstruct This is a multi-turn conversation dataset generated from the Japanese Wikipedia dataset using Orion14B-Chat.", "url": "https://huggingface.co/datasets/shi3z/ja_conv_wikipedia_orion14B_100K", "project_name": "ja_conv_wikipedia_orion14B_100K", "downloads": 26, "source": "Hugging Face", "score": -0.051920377455147, "first_commit": "2024-01-30 01:45:05", "latest_commit": "2024-01-30 02:13:12", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "自動生成の対話データ青空文庫からランダムに抜粋したテキストをもとに､Calm3-22B-chatで自動生成のマルチンターンデータを生成しました｡生成コード吾輩は猫である限定ver 軽いクリーニング", "url": "https://huggingface.co/datasets/kanhatakeyama/multiturn-conv-from-aozora-bunko", "project_name": "multiturn-conv-from-aozora-bunko", "downloads": 26, "source": "Hugging Face", "score": -0.051920377455147, "first_commit": "2024-09-21 06:29:26", "latest_commit": "2024-09-21 06:48:07", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "JGLUE[JNLI]: Japanese General Language Understanding Evaluation JNLI(yahoojapan/JGLUE)", "url": "https://huggingface.co/datasets/zenless-lab/jnli", "project_name": "jnli", "downloads": 26, "source": "Hugging Face", "score": -0.051920377455147, "first_commit": "2024-12-18 10:09:43", "latest_commit": "2024-12-18 14:40:06", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning", "Textual Inference", "Language Models", "Semantic Text Processing" ] }, { "description": "Derived from 青空文庫及びサピエの音声デイジーデータから作成した振り仮名注釈付き音声コーパスのデータセット https://github.com/ndl-lab/hurigana-speech-corpus-aozora All text files in the original data were processed for 3361443 entries; duplicates and entries with no kanji were dropped post cleanup", "url": "https://huggingface.co/datasets/Calvin-Xu/Furigana-Aozora-Speech", "project_name": "Furigana-Aozora-Speech", "downloads": 26, "source": "Hugging Face", "score": -0.051920377455147, "first_commit": "2024-07-29 04:19:30", "latest_commit": "2024-07-30 08:25:50", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multimodality", "Annotation and Dataset Development" ] }, { "description": "It contains Japanese instruction-like data intended for LLM construction/tuning.", "url": "https://huggingface.co/datasets/tellarin-ai/llm-japanese-dataset-vanilla-aya-format", "project_name": "llm-japanese-dataset-vanilla-aya-format", "downloads": 26, "source": "Hugging Face", "score": -0.051920377455147, "first_commit": "2023-12-15 13:03:17", "latest_commit": "2024-01-31 14:27:56", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Derived from 全国書誌データから作成した振り仮名のデータセット（GitHub）", "url": "https://huggingface.co/datasets/Calvin-Xu/Furigana-NDLBIB", "project_name": "Furigana-NDLBIB", "downloads": 26, "source": "Hugging Face", "score": -0.051920377455147, "first_commit": "2024-07-28 08:37:17", "latest_commit": "2024-07-28 09:08:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Annotation and Dataset Development" ] }, { "description": "日本語T5 Prefix Language Model", "url": "https://huggingface.co/sonoisa/t5-base-japanese-adapt", "project_name": "t5-base-japanese-adapt", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2022-08-27 08:51:11", "latest_commit": "2022-11-05 09:34:10", "languages": [], "model_or_dataset": "model", "model_size": 0.248, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "whisper-large-v2-jp model for CTranslate2 This repository contains the conversion of vumichien/whisper-large-v2-jp to the CTranslate2 model format.", "url": "https://huggingface.co/arc-r/faster-whisper-large-v2-jp", "project_name": "faster-whisper-large-v2-jp", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2023-07-07 06:16:06", "latest_commit": "2023-07-07 18:09:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "モデル概要このモデルは、 sonoisa/sentence-luke-japanese-base-lite をSNS上のコメントに人手で攻撃性評価を行ったデータセットでFine-tuningすることで作成しました。 ", "url": "https://huggingface.co/TomokiFujihara/luke-japanese-base-lite-offensiveness-estimation", "project_name": "luke-japanese-base-lite-offensiveness-estimation", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2023-12-08 03:20:14", "latest_commit": "2024-03-24 12:35:36", "languages": [], "model_or_dataset": "model", "model_size": 0.133, "model_architectures": "OffensivenessEstimationModel", "multi_labels": [ "Language Models" ] }, { "description": "zenz-v2.5-small zenz-v2.5はかな漢字変換タスクに特化したGPT-2アーキテクチャの条件付き言語モデルです。", "url": "https://huggingface.co/Miwa-Keita/zenz-v2.5-medium", "project_name": "zenz-v2.5-medium", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2025-01-13 07:51:52", "latest_commit": "2025-01-13 16:13:46", "languages": [], "model_or_dataset": "model", "model_size": 0.31, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "BERT for Japanese Twitter", "url": "https://huggingface.co/LoneWolfgang/bert-for-japanese-twitter", "project_name": "bert-for-japanese-twitter", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2024-05-06 15:53:21", "latest_commit": "2024-08-09 12:24:35", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Language Models", "Emotion Analysis", "Semantic Text Processing", "Sentiment Analysis" ] }, { "description": "bart-base-japanese-news(base-sized model)", "url": "https://huggingface.co/stockmark/bart-base-japanese-news", "project_name": "bart-base-japanese-news", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2023-01-20 04:23:07", "latest_commit": "2023-12-08 03:39:50", "languages": [], "model_or_dataset": "model", "model_size": 0.125, "model_architectures": "BartForConditionalGeneration", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Kuzushiji 49 MNIST FCN Model Overview This repository contains a Fully Convolutional Neural Network (FCN) model for the Kuzushiji 49 MNIST dataset.", "url": "https://huggingface.co/IoriU/kmnist49-classifier", "project_name": "kmnist49-classifier", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2024-02-27 14:24:34", "latest_commit": "2024-02-27 14:53:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Visual Data in NLP", "Information Retrieval", "Text Classification" ] }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-gamma-7B-GPTQ", "project_name": "japanese-stablelm-instruct-gamma-7B-GPTQ", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2023-10-28 19:03:17", "latest_commit": "2023-10-28 20:24:40", "languages": [], "model_or_dataset": "model", "model_size": 1.2, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "Vecteus-V2-7B このモデルは、ベクトルマージなどを用い作成された高性能ベースモデルです。 ", "url": "https://huggingface.co/Local-Novel-LLM-project/Vecteus-V2-7B", "project_name": "Vecteus-V2-7B", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2024-06-16 03:51:43", "latest_commit": "2024-07-06 13:39:41", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Llama-3-Umievo-itr014-Shizuko-8b このモデルは日本語に対応しているLlama-3ベースの４つのモデルを進化的アルゴリズムで進化的マージしたものです。", "url": "https://huggingface.co/umiyuki/Llama-3-Umievo-itr014-Shizuko-8b", "project_name": "Llama-3-Umievo-itr014-Shizuko-8b", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2024-06-08 05:25:05", "latest_commit": "2024-06-08 07:47:59", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "HuggingFaceFW/fineweb-edu-classifierを再現するために、日本語データでtohoku-nlp/bert-base-japanese-v3を学習したモデルです。 ", "url": "https://huggingface.co/Kendamarron/fineweb-edu-classifier-ja-v2", "project_name": "fineweb-edu-classifier-ja-v2", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2024-06-14 13:20:22", "latest_commit": "2024-06-14 13:28:08", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Dataset Description This is the Japanese Translation version of sciq.", "url": "https://huggingface.co/datasets/izumi-lab/sciq-ja-mbartm2m", "project_name": "sciq-ja-mbartm2m", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2023-05-19 02:03:47", "latest_commit": "2023-05-19 03:54:18", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "概要このデータセットはsakura_japanese_datasetの質問に回答する形式で作られた、一問一答形式の合成データセットです。 ", "url": "https://huggingface.co/datasets/Nurture-intelligence/ins_dataset", "project_name": "ins_dataset", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2024-12-15 09:39:47", "latest_commit": "2024-12-17 16:27:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "A JSON based anime dataset containing the most important meta data as well as cross references to various anime sites such as MAL, ANIDB, ANILIST, KITSU and more...", "url": "https://huggingface.co/datasets/labofsahil/animelist-dataset", "project_name": "animelist-dataset", "downloads": 25, "source": "Hugging Face", "score": -0.05192747658084151, "first_commit": "2024-11-16 05:37:06", "latest_commit": "2024-12-25 22:00:46", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "sarashina2.2-3b-instruct-v0.1-GGUF base_model: sbintuitions/sarashina2.2-3b-instruct-v0.1 imatrix: TFMC/imatrix-dataset-for-japanese-llm", "url": "https://huggingface.co/yasu-oh/sarashina2.2-3b-instruct-v0.1-GGUF", "project_name": "sarashina2.2-3b-instruct-v0.1-GGUF", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2025-03-05 04:07:04", "latest_commit": "2025-03-05 13:15:09", "languages": [], "model_or_dataset": "model", "model_size": 3.36, "model_architectures": null, "multi_labels": [ "Language Models", "Explainability & Interpretability in NLP" ] }, { "description": "This model is a fine-tuned version of facebook/wav2vec2-xls-r-300m on the MOZILLA-FOUNDATION/COMMON_VOICE_8_0 - JA dataset.", "url": "https://huggingface.co/AndrewMcDowell/wav2vec2-xls-r-300m-japanese", "project_name": "wav2vec2-xls-r-300m-japanese", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2022-01-26 15:43:02", "latest_commit": "2022-03-23 18:34:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "bert-base-japanese-v3-bpr-passage-aio 「大規模言語モデル入門」の第9章で紹介している文書検索モデルBPRのパッセージエンコーダです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-bpr-passage-aio", "project_name": "bert-base-japanese-v3-bpr-passage-aio", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2023-06-06 08:22:28", "latest_commit": "2023-07-24 07:14:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertModel", "multi_labels": [ "Information Retrieval", "Language Models", "Semantic Text Processing" ] }, { "description": "This is a model for named entity recognition of Japanese medical documents.", "url": "https://huggingface.co/Tomohiro/RealMedNLP_CR_JA", "project_name": "RealMedNLP_CR_JA", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2022-08-08 08:55:23", "latest_commit": "2022-08-13 03:06:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition", "Tagging" ] }, { "description": "swallow-hermes-st-v1 物語作成に強めなモデルが出来ないかと考えて作ったモデルです。", "url": "https://huggingface.co/napopoa32/swallow-hermes-st-v1", "project_name": "swallow-hermes-st-v1", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2024-03-24 06:19:48", "latest_commit": "2024-03-26 12:36:41", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "「LLM-jp-3 172B beta1」利用規約この利用規約（以下「本規約」といいます）は、大学共同利用機関法人情報・システム研究機構国立情報学研究所（以下「提供者」といいます）による開発の成果物として公開する大規模言語モデル「LLM-jp-3 172B beta1」（以下「本プログラム」といいます）の利用に関する条件を定めるものです。", "url": "https://huggingface.co/llm-jp/llm-jp-3-172b-beta1", "project_name": "llm-jp-3-172b-beta1", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 172.0, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ESPnet2 TTS pretrained model kan-bayashi/tsukuyomi_tts_finetune_full_band_jsut_vits_raw_phn_jaconv_pyopenjtalk_prosody_latest ♻", "url": "https://huggingface.co/espnet/kan-bayashi_tsukuyomi_tts_finetune_full_band_jsut_vits_raw_phn_jaconv_pyopenjtalk_prosody_latest", "project_name": "kan-bayashi_tsukuyomi_tts_finetune_full_band_jsut_vits_raw_phn_jaconv_pyopenjtalk_prosody_latest", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2021-10-23 20:50:15", "latest_commit": "2021-10-23 16:50:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Miwa-Keita/zenz-v1-checkpoints を optimum 用に ONNX に変換したモデルです。", "url": "https://huggingface.co/p1atdev/zenz-v1-onnx", "project_name": "zenz-v1-onnx", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2024-06-29 03:03:03", "latest_commit": "2024-06-29 03:40:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "transformer-lm-japanese-1.0b This is a JAX/Flax-based transformer language model trained on a Japanese dataset.", "url": "https://huggingface.co/fukugawa/transformer-lm-japanese-1.0b", "project_name": "transformer-lm-japanese-1.0b", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2024-07-25 04:27:53", "latest_commit": "2024-09-06 12:44:00", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "TransformerLMForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Tanuki-8B-dpo-v1.0-4k-AWQ 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0-4kのAWQ 4bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-4k-AWQ", "project_name": "Tanuki-8B-dpo-v1.0-4k-AWQ", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2024-08-27 05:48:42", "latest_commit": "2024-09-03 09:28:33", "languages": [], "model_or_dataset": "model", "model_size": 1.47, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "this is forkhttps://huggingface.co/datasets/globis-university/aozorabunko-clean filtered row[\"meta\"][\"文字遣い種別\"] == \"新字新仮名\"", "url": "https://huggingface.co/datasets/if001/aozorabunko-clean-sin", "project_name": "aozorabunko-clean-sin", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2023-09-04 04:22:55", "latest_commit": "2024-10-14 12:46:29", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Reasoning、知識、会話の掛け合いなどの情報密度が高いマルチターンの会話データです。 ", "url": "https://huggingface.co/datasets/aixsatoshi/Chat-with-cosmopedia", "project_name": "Chat-with-cosmopedia", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2024-03-24 07:45:03", "latest_commit": "2024-03-28 14:36:58", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning" ] }, { "description": "NVIDIA が公開している SteerLM 向けのトライアルデータセット HelpSteer2を日本語に自動翻訳したデータセットになります。", "url": "https://huggingface.co/datasets/kunishou/HelpSteer2-20k-ja", "project_name": "HelpSteer2-20k-ja", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2024-06-21 08:09:33", "latest_commit": "2024-06-21 08:44:21", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "合成日本語指示データセット概要このデータセットは、大規模言語モデル（Qwen2.5-32B-instruct）", "url": "https://huggingface.co/datasets/DeL-TaiseiOzaki/Tengentoppa-sft-qwen2.5-32b-reasoning-100k", "project_name": "Tengentoppa-sft-qwen2.5-32b-reasoning-100k", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2024-11-03 07:18:52", "latest_commit": "2024-11-03 07:34:14", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning" ] }, { "description": "For more information, see website below!", "url": "https://huggingface.co/datasets/Hoshikuzu/Tanaka-corpus", "project_name": "Tanaka-corpus", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2024-08-24 14:51:15", "latest_commit": "2024-08-25 13:20:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "English - Japanese pairs taken from https://tatoeba.org/en/downloads and then deduplicated.", "url": "https://huggingface.co/datasets/Verah/tatoeba_dedupe_en-jp_2024-March-01", "project_name": "tatoeba_dedupe_en-jp_2024-March-01", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2024-03-05 13:46:13", "latest_commit": "2024-03-06 08:34:02", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "LoRAデータセット開示用データ。", "url": "https://huggingface.co/datasets/den2nova/den2niji", "project_name": "den2niji", "downloads": 24, "source": "Hugging Face", "score": -0.05193457570653602, "first_commit": "2023-07-04 04:13:58", "latest_commit": "2023-07-07 01:55:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Tagging" ] }, { "description": "Kotoba-Speech-v0.1 Kotoba-Speech v0.1 is a 1.2B Transformer-based speech generative model.", "url": "https://huggingface.co/kotoba-tech/kotoba-speech-v0.1", "project_name": "kotoba-speech-v0.1", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2024-03-14 01:21:58", "latest_commit": "2024-04-17 07:54:48", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Speech & Audio in NLP", "Multimodality" ] }, { "description": "Our Models Vecteus Ninja-v1 Ninja-v1-NSFW Ninja-v1-128k Ninja-v1-NSFW-128k Model Card for Ninja-v1.0 The Mistral-7B--based Large Language Model (LLM) is an noveldataset fine-tuned version of the Mistral-7B-v0.1 Ninja has the following changes compared to Mistral-7B-v0.1.", "url": "https://huggingface.co/Local-Novel-LLM-project/Ninja-v1", "project_name": "Ninja-v1", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2024-04-24 10:28:30", "latest_commit": "2024-05-04 04:07:09", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "rinna/nekomata-14b-gguf Overview The model is the GGUF version of rinna/nekomata-14b.", "url": "https://huggingface.co/rinna/nekomata-14b-gguf", "project_name": "nekomata-14b-gguf", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2023-12-19 08:11:51", "latest_commit": "2024-07-20 08:29:58", "languages": [], "model_or_dataset": "model", "model_size": 14.2, "model_architectures": null, "multi_labels": [ "Multilinguality", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "このモデルはdeberta-v2-base-japaneseをファインチューニングして固有表現抽出（NER）に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-base-japanese-finetuned-ner", "project_name": "deberta-v2-base-japanese-finetuned-ner", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2023-01-20 05:57:37", "latest_commit": "2023-03-27 08:05:06", "languages": [], "model_or_dataset": "model", "model_size": 0.112, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition", "Language Models" ] }, { "description": "Japanese GPT2 Lyric Model Model description", "url": "https://huggingface.co/skytnt/gpt2-japanese-lyric-medium", "project_name": "gpt2-japanese-lyric-medium", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2022-07-08 13:28:12", "latest_commit": "2023-10-21 14:53:57", "languages": [], "model_or_dataset": "model", "model_size": 0.361, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-large-japanese-aozora Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-aozora", "project_name": "deberta-large-japanese-aozora", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2022-05-26 14:46:58", "latest_commit": "2023-01-14 00:27:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "jpn-heb source group: Japanese target group:", "url": "https://huggingface.co/Helsinki-NLP/opus-mt-ja-he", "project_name": "opus-mt-ja-he", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2020-08-19 00:28:58", "latest_commit": "2023-08-16 11:59:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MarianMTModel", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Text Normalization", "Semantic Text Processing" ] }, { "description": "makiart/jp-ModernBert-base-preview このモデルはABCI 生成AIハッカソンにて提供された計算資源によってAlgomaticチームが作成したモデルです。 ", "url": "https://huggingface.co/makiart/jp-ModernBert-base-preview", "project_name": "jp-ModernBert-base-preview", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2025-02-11 11:43:29", "latest_commit": "2025-02-14 09:03:59", "languages": [], "model_or_dataset": "model", "model_size": 0.15, "model_architectures": "ModernBertForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "日本語でtrainingしたllama2をinstruction用のデータセットでsftしたものになります base: https://huggingface.co/if001/llama2_ja_small trainingは以下のscript参照 https://github.com/Lightning-AI/lit-gpt/tree/main use from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained(\"if001/sentencepiece_ja\", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(\"if001/llama2_ja_small\")", "url": "https://huggingface.co/if001/llama2_ja_small_instruct", "project_name": "llama2_ja_small_instruct", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2023-10-21 05:21:13", "latest_commit": "2023-10-23 19:39:51", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Mixtral-8x7B-v0.1-japanese Mixtral-8x7B-v0.1-japaneseはMixtral-8x7B-v0.1をベースに日本語の語彙拡張継続事前学習を実施したモデルです。", "url": "https://huggingface.co/abeja/Mixtral-8x7B-v0.1-japanese", "project_name": "Mixtral-8x7B-v0.1-japanese", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2024-04-16 03:06:14", "latest_commit": "2024-04-20 09:14:10", "languages": [], "model_or_dataset": "model", "model_size": 46.9, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Model card 英日、日英翻訳用モデルC3TR-AdapterのGPTQ4bit量子化版です。", "url": "https://huggingface.co/webbigdata/C3TR-Adapter_gptq", "project_name": "C3TR-Adapter_gptq", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2024-05-20 13:41:37", "latest_commit": "2024-05-24 07:40:23", "languages": [], "model_or_dataset": "model", "model_size": 1.83, "model_architectures": "GemmaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Ruri-Reranker: Japanese General Reranker Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-reranker-stage1-base", "project_name": "ruri-reranker-stage1-base", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2024-08-19 16:14:12", "latest_commit": "2024-09-04 08:52:18", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "このデータセットはkunishou氏が公開している\"databricks-dolly-15k\"を日本語訳したkunishou/databricks-dolly-15k-jaデータセットの語尾をArrowPro-7B-KUJIRAを用いて「にゃん！", "url": "https://huggingface.co/datasets/DataPilot/databricks-dolly-15k-Nyan-ja", "project_name": "databricks-dolly-15k-Nyan-ja", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2024-05-18 13:03:25", "latest_commit": "2024-05-19 10:24:16", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "SyosetuNames-3.5M: Japanese Light Novel Character Names Corpus Overview This dataset extracts fictional character names from the publicly available text of novels on the Japanese light novel platform \"Shōsetsuka ni Narō\" (syosetu.com),", "url": "https://huggingface.co/datasets/Sunbread/SyosetuNames-3.5M", "project_name": "SyosetuNames-3.5M", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2025-01-22 14:17:48", "latest_commit": "2025-01-24 05:17:03", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Term Extraction", "Information Extraction & Text Mining", "Named Entity Recognition", "Annotation and Dataset Development" ] }, { "description": "JSeM: Japanese semantic test suite (Japanese FraCaS and extensions) 叙述文間の含意関係は、言語学においては意味論の中心的な説明対象の一つであるとともに、理論を検証するためのベンチマークとして用いられています。 ", "url": "https://huggingface.co/datasets/zenless-lab/jsem", "project_name": "jsem", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2024-12-18 10:10:11", "latest_commit": "2024-12-19 08:52:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning", "Textual Inference", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "This dataset is a translation of https://huggingface.co/datasets/Abirate/english_quotes into Japanese using the llm-jp/llm-jp-3-3.7b-instruct model.", "url": "https://huggingface.co/datasets/speed/english_quotes_ja", "project_name": "english_quotes_ja", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2024-10-22 14:22:50", "latest_commit": "2024-10-22 14:46:27", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "LLM-jp Toxicity Dataset 日本語有害文書データセット「LLM-jp Toxicity Dataset」 See https://gitlab.llm-jp.nii.ac.jp/datasets/llm-jp-toxicity-dataset", "url": "https://huggingface.co/datasets/p1atdev/LLM-jp-Toxicity-Dataset", "project_name": "LLM-jp-Toxicity-Dataset", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2024-08-07 07:11:08", "latest_commit": "2024-08-07 07:21:07", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Annotation and Dataset Development" ] }, { "description": "埋め込みモデルの学習、評価のためのクラスタリングデータセットです。 ", "url": "https://huggingface.co/datasets/oshizo/HSClustering-ja", "project_name": "HSClustering-ja", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2024-06-23 12:46:05", "latest_commit": "2024-06-23 15:34:38", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "japanese-stackexchange 英語による日本語に関する質問ができる Japanese Stack Exchange のデータダンプをもとにデータを加工し、質問文と回答文のペアになるように調整した QA データセット。 ", "url": "https://huggingface.co/datasets/p1atdev/japanese-stackexchange", "project_name": "japanese-stackexchange", "downloads": 23, "source": "Hugging Face", "score": -0.051941674832230535, "first_commit": "2023-12-16 03:14:00", "latest_commit": "2023-12-21 05:30:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Annotation and Dataset Development" ] }, { "description": "wav2vec2-live-japanese https://github.com/ttop32/wav2vec2-live-japanese-translatorFine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese hiragana using the common_voice JSUT CSS10", "url": "https://huggingface.co/ttop324/wav2vec2-live-japanese", "project_name": "wav2vec2-live-japanese", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2021-10-26 14:51:21", "latest_commit": "2021-10-31 15:34:55", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "deberta-large-japanese-wikipedia Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-wikipedia", "project_name": "deberta-large-japanese-wikipedia", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2022-07-05 22:01:16", "latest_commit": "2023-02-27 10:15:35", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "deberta-base-japanese-aozora Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-aozora", "project_name": "deberta-base-japanese-aozora", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2022-05-24 04:30:28", "latest_commit": "2023-01-08 08:41:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "zenz-v2 zenz-v2はGPT-2アーキテクチャに基づくかな漢字変換タスクに特化した言語モデルです。", "url": "https://huggingface.co/Miwa-Keita/zenz-v2-gguf", "project_name": "zenz-v2-gguf", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2024-07-31 15:15:16", "latest_commit": "2024-08-04 09:35:48", "languages": [], "model_or_dataset": "model", "model_size": 0.09509999999999999, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "モデルの説明(English explanation is below.", "url": "https://huggingface.co/keitokei1994/Llama-3-Umievo-Shizuko-sqlcoder-2x8B", "project_name": "Llama-3-Umievo-Shizuko-sqlcoder-2x8B", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2024-06-09 12:17:00", "latest_commit": "2024-06-11 07:39:45", "languages": [], "model_or_dataset": "model", "model_size": 13.7, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Multilinguality", "Responsible & Trustworthy NLP", "Text Generation", "Programming Languages in NLP", "Multimodality", "Explainability & Interpretability in NLP" ] }, { "description": "LayoutLM-wikipedia-ja Model", "url": "https://huggingface.co/jri-advtechlab/layoutlm-wikipedia-ja", "project_name": "layoutlm-wikipedia-ja", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2024-01-31 01:50:23", "latest_commit": "2024-01-31 11:08:03", "languages": [], "model_or_dataset": "model", "model_size": 0.114, "model_architectures": "LayoutLMForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "COMET-GPT2 ja Finetuned GPT-2 on ATOMIC ja using a causal language modeling (CLM) objective.", "url": "https://huggingface.co/nlp-waseda/comet-gpt2-small-japanese", "project_name": "comet-gpt2-small-japanese", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2022-11-15 05:14:35", "latest_commit": "2023-02-13 10:26:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Dialogue Response Generation", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "transformers-ud-japanese-electra-ginza (sudachitra-wordpiece, mC4 Japanese) -", "url": "https://huggingface.co/megagonlabs/transformers-ud-japanese-electra-base-discriminator", "project_name": "transformers-ud-japanese-electra-base-discriminator", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2021-08-23 09:54:09", "latest_commit": "2021-09-22 11:00:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForPreTraining", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Riga_collectionとは？ ", "url": "https://huggingface.co/natsusakiyomi/Riga_Collection", "project_name": "Riga_Collection", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2023-03-18 17:40:43", "latest_commit": "2023-03-24 16:13:45", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "実験モデルです。", "url": "https://huggingface.co/Akimite/Qwen2-7b-Instruct-Boku-v2", "project_name": "Qwen2-7b-Instruct-Boku-v2", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2024-06-13 03:23:05", "latest_commit": "2024-06-15 14:58:10", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "The JaNLI (Japanese Adversarial NLI) dataset, inspired by the English HANS dataset, is designed to necessitate an understanding of Japanese linguistic phenomena and to illuminate the vulnerabilities of models.", "url": "https://huggingface.co/datasets/hpprc/janli", "project_name": "janli", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2023-04-05 12:25:01", "latest_commit": "2023-04-11 13:40:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Robustness in NLP", "Annotation and Dataset Development" ] }, { "description": "mqaデータセットのquery--passageのペアについて重複を削除したデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/mqa-ja", "project_name": "mqa-ja", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2024-04-07 06:23:02", "latest_commit": "2024-04-07 15:16:42", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Bluemoon_Top50MB_Sorted_Fixed_ja SicariusSicariiStuff/Bluemoon_Top50MB_Sorted_Fixedを、GENIAC-Team-Ozaki/karakuri-lm-8x7b-chat-v0.1-awqを用いて日本語に翻訳したロールプレイ学習用データセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Bluemoon_Top50MB_Sorted_Fixed_ja", "project_name": "Bluemoon_Top50MB_Sorted_Fixed_ja", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2024-05-18 15:35:17", "latest_commit": "2024-05-18 15:38:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Synthetic-JP-Conversations-Magpie-Nemotron-4-10k Magpieの手法をnvidia/Nemotron-4-340B-Instructに対して適用し作成した、約10000件の日本語instruction tuning用データセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-Conversations-Magpie-Nemotron-4-10k", "project_name": "Synthetic-JP-Conversations-Magpie-Nemotron-4-10k", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2024-07-05 13:53:45", "latest_commit": "2024-07-05 13:57:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "日本語指示・推論・回答データセット概要このリポジトリは、SkunkworksAI/reasoning-0.01 に含まれるインストラクションデータを基に、Qwen/Qwen2.5-32B-Instruct モデルを用いて作成した日本語版の指示・推論・回答データセットです。", "url": "https://huggingface.co/datasets/DeL-TaiseiOzaki/Tengentoppa-sft-reasoning-ja", "project_name": "Tengentoppa-sft-reasoning-ja", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2024-10-12 15:26:10", "latest_commit": "2024-10-12 15:45:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning", "Annotation and Dataset Development" ] }, { "description": "長文からの要約データセットです。 ", "url": "https://huggingface.co/datasets/aixsatoshi/Longcontext-aozora-summary", "project_name": "Longcontext-aozora-summary", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2024-03-29 11:09:15", "latest_commit": "2024-03-30 10:55:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "wiki40b-ja から生成した質問応答データセット", "url": "https://huggingface.co/datasets/U23-lab/wiki40b_qa_ja", "project_name": "wiki40b_qa_ja", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2024-03-28 11:17:17", "latest_commit": "2024-03-28 11:45:07", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Fine-tuned XLSR-53 large model for speech recognition in Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the train and validation splits of Common Voice 6.1, CSS10 and JSUT.", "url": "https://huggingface.co/datasets/Gustav114514/work", "project_name": "work", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2024-01-11 05:39:33", "latest_commit": "2024-01-11 05:52:36", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Text Generation", "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "oasst1-ja Description Based on OpenAssistant Conversations Dataset (OASST1)", "url": "https://huggingface.co/datasets/tombailey/oasst1-ja", "project_name": "oasst1-ja", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2023-07-24 14:35:00", "latest_commit": "2023-07-26 11:32:45", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents", "Annotation and Dataset Development" ] }, { "description": "Dataset Summary From the official README.md: CAMERA (CyberAgent Multimodal Evaluation for Ad Text GeneRAtion) is the Japanese ad text generation dataset.", "url": "https://huggingface.co/datasets/creative-graphic-design/CAMERA", "project_name": "CAMERA", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2023-03-17 23:02:32", "latest_commit": "2023-03-17 23:49:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation", "Annotation and Dataset Development" ] }, { "description": "The following data set was vectorized with the intfloat/multilingual-e5-base model and an index file created by faiss.", "url": "https://huggingface.co/datasets/oshizo/japanese-wikipedia-paragraphs-embeddings", "project_name": "japanese-wikipedia-paragraphs-embeddings", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2023-12-10 03:41:14", "latest_commit": "2023-12-15 13:16:42", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Representation Learning", "Semantic Text Processing" ] }, { "description": "This dataset is based on the Japanese version of Wikipedia dataset and converted into a multi-turn conversation format using llama2Pro8B. ", "url": "https://huggingface.co/datasets/shi3z/ja_conv_wikipedia_llama2pro8b_30k", "project_name": "ja_conv_wikipedia_llama2pro8b_30k", "downloads": 22, "source": "Hugging Face", "score": -0.05194877395792504, "first_commit": "2024-01-21 22:14:41", "latest_commit": "2024-01-21 22:16:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "japanese-sexual-moderation-v2は、studio-ousia/luke-japanese-large-liteをファインチューニングしたモデルです。", "url": "https://huggingface.co/oshizo/japanese-sexual-moderation-v2", "project_name": "japanese-sexual-moderation-v2", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2024-01-03 04:58:17", "latest_commit": "2024-01-03 07:09:05", "languages": [], "model_or_dataset": "model", "model_size": 0.41400000000000003, "model_architectures": "LukeForSequenceClassification", "multi_labels": [ "Information Extraction & Text Mining" ] }, { "description": "概要このモデルはllama3.1-8B-instructをもとに日本語性能を高めることを目的にMergekit&ファインチューニングを用いて作成されました。 ", "url": "https://huggingface.co/DataPilot/Llama3.1-ArrowSE-v0.4", "project_name": "Llama3.1-ArrowSE-v0.4", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2024-07-24 07:37:16", "latest_commit": "2024-07-24 12:00:46", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "deberta-large-japanese-unidic Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-unidic", "project_name": "deberta-large-japanese-unidic", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2022-06-10 12:49:12", "latest_commit": "2022-06-19 09:15:35", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "Model Card for Japanese character-level GPT-2 Medium Model description This is a Japanese character-level GPT-2 Medium (310M parameters) language model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/gpt2-medium-japanese-char", "project_name": "gpt2-medium-japanese-char", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2023-05-18 06:29:28", "latest_commit": "2023-06-08 05:34:26", "languages": [], "model_or_dataset": "model", "model_size": 0.335, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Heron BLIP Japanese StableLM", "url": "https://huggingface.co/turing-motors/heron-chat-blip-ja-stablelm-base-7b-v0", "project_name": "heron-chat-blip-ja-stablelm-base-7b-v0", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2023-09-06 09:31:44", "latest_commit": "2023-09-07 16:59:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VideoBlipForConditionalGeneration", "multi_labels": [ "Visual Data in NLP", "Language Models" ] }, { "description": "rinna/nekomata-7b-gguf Overview The model is the GGUF version of rinna/nekomata-7b.", "url": "https://huggingface.co/rinna/nekomata-7b-gguf", "project_name": "nekomata-7b-gguf", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2023-12-19 08:10:42", "latest_commit": "2024-07-20 08:36:15", "languages": [], "model_or_dataset": "model", "model_size": 7.72, "model_architectures": null, "multi_labels": [ "Multilinguality", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-base-short", "project_name": "t5-base-short", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2023-04-26 08:20:52", "latest_commit": "2023-05-10 10:00:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "roberta-small-japanese-aozora Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-small-japanese-aozora", "project_name": "roberta-small-japanese-aozora", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2021-11-02 12:54:50", "latest_commit": "2021-11-03 23:44:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "mlx-community/DeepSeek-R1-Distill-Qwen-32B-Japanese-8bit The Model mlx-community/DeepSeek-R1-Distill-Qwen-32B-Japanese-8bit was converted to MLX format from cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese using mlx-lm version 0.21.1.", "url": "https://huggingface.co/mlx-community/DeepSeek-R1-Distill-Qwen-32B-Japanese-8bit", "project_name": "DeepSeek-R1-Distill-Qwen-32B-Japanese-8bit", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2025-01-27 14:24:07", "latest_commit": "2025-01-27 15:27:19", "languages": [], "model_or_dataset": "model", "model_size": 9.22, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Text Segmentation", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "zenz-v2.5-small zenz-v2.5はかな漢字変換タスクに特化したGPT-2アーキテクチャの条件付き言語モデルです。", "url": "https://huggingface.co/Miwa-Keita/zenz-v2.5-xsmall", "project_name": "zenz-v2.5-xsmall", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2025-01-13 07:44:03", "latest_commit": "2025-01-13 16:13:06", "languages": [], "model_or_dataset": "model", "model_size": 0.0225, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "kishizaki-sci/phi-4-AWQ-4bit-EN-JP model information phi-4をAutoAWQで4bit 量子化したモデル。", "url": "https://huggingface.co/kishizaki-sci/phi-4-AWQ-4bit-EN-JP", "project_name": "phi-4-AWQ-4bit-EN-JP", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2025-01-10 11:48:09", "latest_commit": "2025-01-10 14:26:47", "languages": [], "model_or_dataset": "model", "model_size": 2.85, "model_architectures": "Phi3ForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "datasets: https://github.com/ids-cv/wrime/raw/master/wrime-ver1.tsv", "url": "https://huggingface.co/kynea0b/cl-tohoku-bert-base-japanese-v3-wrime-8-emotions", "project_name": "cl-tohoku-bert-base-japanese-v3-wrime-8-emotions", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "モデル概要 Watashiha-Llama-2-13B-Ogiri-sftをLLaVAで学習し、画像に対応した大喜利言語モデルです。", "url": "https://huggingface.co/watashiha/Watashiha-Llama-2-13B-Ogiri-sft-vlm", "project_name": "Watashiha-Llama-2-13B-Ogiri-sft-vlm", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2024-02-26 17:55:56", "latest_commit": "2024-03-04 05:22:43", "languages": [], "model_or_dataset": "model", "model_size": 13.3, "model_architectures": "LlavaLlamaForCausalLM", "multi_labels": [ "Text Generation", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "kakuyomu-genre-bert 小説のタイトルや紹介文からジャンルを分類する BERT 東北大の cl-tohoku/bert-base-japanese-char-v3 をベースにファインチューンされました。", "url": "https://huggingface.co/p1atdev/kakuyomu-genre-bert", "project_name": "kakuyomu-genre-bert", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2023-09-22 06:28:13", "latest_commit": "2023-09-22 07:08:46", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "ryota39様の Tora-7B-v0.2 をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Tora-7B-v0.2-GGUF", "project_name": "Tora-7B-v0.2-GGUF", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2024-05-06 12:50:49", "latest_commit": "2024-06-15 03:17:32", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "What is this? ", "url": "https://huggingface.co/grapevine-AI/sarashina2-70b-gguf", "project_name": "sarashina2-70b-gguf", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2024-10-27 15:09:05", "latest_commit": "2024-10-27 23:41:11", "languages": [], "model_or_dataset": "model", "model_size": 70.1, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Ruri: Japanese General Text Embeddings Usage First install the Sentence Transformers library: pip install -U sentence-transformers Then you can load this model and run inference.", "url": "https://huggingface.co/cl-nagoya/ruri-pt-small", "project_name": "ruri-pt-small", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2024-08-17 10:39:05", "latest_commit": "2024-08-30 03:11:20", "languages": [], "model_or_dataset": "model", "model_size": 0.0681, "model_architectures": "DistilBertModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "AnzuMixSeries VAEの内臓はないぞ！と言わせないぞ！！！！ ", "url": "https://huggingface.co/natsusakiyomi/AnzuMix", "project_name": "AnzuMix", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2023-07-30 13:10:10", "latest_commit": "2023-11-15 12:39:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Oumuamua-7b-instruct This is a merge of pre-trained language models created using mergekit. ", "url": "https://huggingface.co/nitky/Oumuamua-7b-instruct", "project_name": "Oumuamua-7b-instruct", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2024-06-01 10:40:37", "latest_commit": "2024-06-01 15:55:51", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "HF Datasets version of Tanaka Corpus.", "url": "https://huggingface.co/datasets/hpprc/tanaka-corpus", "project_name": "tanaka-corpus", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2024-03-21 01:49:50", "latest_commit": "2024-03-21 12:50:28", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Structured Data in NLP", "Annotation and Dataset Development" ] }, { "description": "自動生成Q&A 種々のデータソースから､MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-GGUFを使ってQ&Aを自動生成したものです｡ CC-BY系またはApatch-2.0のデータソースを改変して生成しています。 ", "url": "https://huggingface.co/datasets/hatakeyama-llm-team/AutoGeneratedJapaneseQA", "project_name": "AutoGeneratedJapaneseQA", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2024-04-27 04:32:20", "latest_commit": "2024-05-19 03:22:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Text Error Correction" ] }, { "description": "Synthetic-JP-Coding-Dataset-Magpie-Nemotron-4-10k Magpieの手法をnvidia/Nemotron-4-340B-Instructに対して適用し作成した、約10000件の日本語のコーディング用対話データセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-Coding-Dataset-Magpie-Nemotron-4-10k", "project_name": "Synthetic-JP-Coding-Dataset-Magpie-Nemotron-4-10k", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2024-07-06 06:53:27", "latest_commit": "2024-07-06 06:57:57", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This dataset is the data that corrected the translation errors and untranslated data of the Japanese data in MBZUAI/multilingual-llava-bench-in-the-wild.", "url": "https://huggingface.co/datasets/toshi456/llava-bench-in-the-wild-ja", "project_name": "llava-bench-in-the-wild-ja", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": "2024-03-06 21:56:53", "latest_commit": "2024-04-01 15:15:57", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "時事情報に関する日本語QAベンチマーク『ニュースQ』はHugging Faceにて無償で配布します。", "url": "https://huggingface.co/datasets/asahi-research/newsq_test", "project_name": "newsq_test", "downloads": 21, "source": "Hugging Face", "score": -0.051955873083619555, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Model Card for Japanese DeBERTa V2 base Model description This is a Japanese DeBERTa V2 base model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/G-Root/deberta-v2-base-japanese", "project_name": "deberta-v2-base-japanese", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2023-09-14 10:06:00", "latest_commit": "2023-09-14 17:24:52", "languages": [], "model_or_dataset": "model", "model_size": 0.137, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "Model Card for Japanese character-level GPT-2 Large Model description", "url": "https://huggingface.co/ku-nlp/gpt2-large-japanese-char", "project_name": "gpt2-large-japanese-char", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2023-12-27 11:18:45", "latest_commit": "2023-12-27 12:07:30", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "更新履歴 2023年5月7日「oasst1-89k-ja」データセットを追加して対話システムに対応しました。", "url": "https://huggingface.co/inu-ai/dolly-japanese-gpt-1b", "project_name": "dolly-japanese-gpt-1b", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2023-04-13 22:46:07", "latest_commit": "2023-08-01 07:55:27", "languages": [], "model_or_dataset": "model", "model_size": 1.33, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "このモデルはluke-japanese-baseをファインチューニングして、JSTS(文章の類似度計算)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-finetuned-jsts", "project_name": "luke-japanese-base-finetuned-jsts", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2023-02-10 06:41:43", "latest_commit": "2023-12-12 06:58:59", "languages": [], "model_or_dataset": "model", "model_size": 0.279, "model_architectures": "LukeForSequenceClassification", "multi_labels": [ "Language Models", "Semantic Similarity", "Semantic Text Processing" ] }, { "description": "モデル説明 (model explanation) V1 = MoeDiffusion 1.0 + (HassanBlend 1.5 - VMix03) * 0.2 V2 = MoeDiffusion 0.6 : HassanBlend 1.5 0.2 : VMix03 : 0.2 マージ元のルーツにNAIリークやInsta系モデルが含まれるという噂があるので、NAIリークアンチ・Insta系モデルアンチには非推奨理想の黒髪ポニテ顔が出せるYaguruMagikuを、ある程度顔が近くて制御しやすいAbyssOrangeMix2と混ぜてみた。 ", "url": "https://huggingface.co/ThePioneer/MoeDiffusionPlusPlus", "project_name": "MoeDiffusionPlusPlus", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2023-01-19 13:04:02", "latest_commit": "2023-01-21 02:05:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Explainability & Interpretability in NLP" ] }, { "description": "Llama-3.3-Swallow-70B-Instruct-v0.4-GGUF base_model: tokyotech-llm/Llama-3.3-Swallow-70B-Instruct-v0.4 imatrix: TFMC/imatrix-dataset-for-japanese-llm", "url": "https://huggingface.co/yasu-oh/Llama-3.3-Swallow-70B-Instruct-v0.4-GGUF", "project_name": "Llama-3.3-Swallow-70B-Instruct-v0.4-GGUF", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2025-03-11 00:55:33", "latest_commit": "2025-03-11 10:08:16", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "このモデルはdeberta-v2-tiny-japaneseをファインチューニングしてCommonsenseQA(選択式の質問)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-japanese-tiny-finetuned-commonsenseqa", "project_name": "deberta-v2-japanese-tiny-finetuned-commonsenseqa", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2023-05-11 10:28:33", "latest_commit": "2023-05-26 15:01:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMultipleChoice", "multi_labels": [ "Reasoning", "Language Models", "Commonsense Reasoning", "Semantic Text Processing" ] }, { "description": "このモデルはcl-tohoku/bert-large-japanese-v2をファインチューニングしてCommonsenseQA(選択式の質問)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/bert-large-japanese-v2-finetuned-commonsenseQA", "project_name": "bert-large-japanese-v2-finetuned-commonsenseQA", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2023-05-26 14:33:37", "latest_commit": "2023-05-26 15:02:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMultipleChoice", "multi_labels": [ "Reasoning", "Language Models", "Commonsense Reasoning", "Semantic Text Processing" ] }, { "description": "Details: https://spacy.io/models/ja#ja_core_news_trf Japanese transformer pipeline (Transformer(name='cl-tohoku/bert-base-japanese-char-v2', piece_encoder='char', stride=160, type='bert', width=768, window=216, vocab_size=6144)).", "url": "https://huggingface.co/spacy/ja_core_news_trf", "project_name": "ja_core_news_trf", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2021-11-09 16:58:01", "latest_commit": "2023-10-10 06:27:03", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Named Entity Recognition", "Language Models", "Semantic Text Processing" ] }, { "description": "2025 年のエイプリールフールネタ枠でした 🎉🎉 Saikyou Shield 30M 🎉🎉 🔥 危険なプロンプトを100%検出できる最強の分類モデル 🔥 Jailbreak やプロンプトインジェクションを含む、あらゆるプロンプトを危険と分類することができます！！ ", "url": "https://huggingface.co/p1atdev/saikyou-shield-30m", "project_name": "saikyou-shield-30m", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2025-03-31 18:08:03", "latest_commit": "2025-04-01 17:51:03", "languages": [], "model_or_dataset": "model", "model_size": 0.0368, "model_architectures": "ModernBertForSequenceClassification", "multi_labels": [] }, { "description": "AIBunCho様の公開モデル (https://huggingface.co/AIBunCho/japanese-novel-gpt-j-6b)", "url": "https://huggingface.co/sehiro/AI-buncho-novel-ct2", "project_name": "AI-buncho-novel-ct2", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2023-08-26 22:53:50", "latest_commit": "2023-08-26 23:17:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "japanese-novel-gpt-j-6b https://huggingface.co/AIBunCho/japanese-novel-gpt-j-6b\" に合計216個の評価の高いなろう小説、青空文庫、ウィキペディアなどの文章をQLoRA学習させた小説生成用モデルです。 ", "url": "https://huggingface.co/akineAItech/Jeneri-SAMA-6B", "project_name": "Jeneri-SAMA-6B", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-02-25 10:30:06", "latest_commit": "2024-03-16 15:00:14", "languages": [], "model_or_dataset": "model", "model_size": 6.05, "model_architectures": "GPTJForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "jvnvコーパスのF2から学習して作成したbert-vits2の日本語モデルです。 ", "url": "https://huggingface.co/yasyune/bert_vits2_2.1_jvnv", "project_name": "bert_vits2_2.1_jvnv", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2023-12-09 02:37:46", "latest_commit": "2023-12-12 07:58:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-base-beta-70B-AWQ", "project_name": "japanese-stablelm-base-beta-70B-AWQ", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2023-11-06 11:33:47", "latest_commit": "2023-11-09 18:16:05", "languages": [], "model_or_dataset": "model", "model_size": 9.68, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "更新情報日本語機能とinstructベクトルのバランス調整したver.2をアップロードしましたSwallow-MX-8x7b-NVE-chatvector-Mixtral-instruct-v2 モデル概要 Swallow-MX-8x7b-NVE-v0.1に対し、 Mixtral-8x7B-Instruct-v0.1とMixtral-8x7B-v0.1の差分をマージしたモデルです。 ", "url": "https://huggingface.co/aixsatoshi/Swallow-MX-8x7b-NVE-chatvector-Mixtral-instruct", "project_name": "Swallow-MX-8x7b-NVE-chatvector-Mixtral-instruct", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-03-20 16:15:26", "latest_commit": "2024-03-23 04:14:49", "languages": [], "model_or_dataset": "model", "model_size": 46.7, "model_architectures": "MixtralForCausalLM", "multi_labels": [] }, { "description": "Mixtral-8x7B-Instruct-v0.1-japanese-alpha-merged Mixtral-8x7B-Instruct-v0.1-japanese-alpha-mergedはMixtral-8x7B-Instruct-v0.1をベースに日本語の語彙拡張継続事前学習を実施した学習途中のモデルに対して、差分マージを実施したモデルです。", "url": "https://huggingface.co/abeja/Mixtral-8x7B-Instruct-v0.1-japanese-alpha-merged", "project_name": "Mixtral-8x7B-Instruct-v0.1-japanese-alpha-merged", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-04-16 07:54:14", "latest_commit": "2024-04-20 09:14:59", "languages": [], "model_or_dataset": "model", "model_size": 46.9, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-2-base-1_6b", "project_name": "japanese-stablelm-2-base-1_6b", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 1.64, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1-GGUF 概要 Aratako/ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1の量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1-GGUF", "project_name": "ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1-GGUF", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-03-07 13:21:38", "latest_commit": "2024-03-07 13:47:58", "languages": [], "model_or_dataset": "model", "model_size": 11.2, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "[EZO model card]", "url": "https://huggingface.co/AXCXEPT/EZO-InternVL2-26B", "project_name": "EZO-InternVL2-26B", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-08-19 08:03:55", "latest_commit": "2024-08-23 10:56:47", "languages": [], "model_or_dataset": "model", "model_size": 25.5, "model_architectures": "InternVLChatModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "sehiro/EvoLLM-JP-A-v1-7B-IQ4_XS-GGUF", "url": "https://huggingface.co/sehiro/EvoLLM-JP-A-v1-7B-IQ4_XS-GGUF", "project_name": "EvoLLM-JP-A-v1-7B-IQ4_XS-GGUF", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-09-15 03:54:05", "latest_commit": "2024-09-15 03:54:23", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "mlx-community/Llama-3.1-70B-Japanese-Instruct-2407-4bit", "url": "https://huggingface.co/mlx-community/Llama-3.1-70B-Japanese-Instruct-2407-4bit", "project_name": "Llama-3.1-70B-Japanese-Instruct-2407-4bit", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-07-26 21:06:54", "latest_commit": "2024-07-26 21:37:02", "languages": [], "model_or_dataset": "model", "model_size": 11.0, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Kanji Diffusion v1-4 Model Card Kanji Diffusion is a latent text-to-image diffusion model capable of hallucinating Kanji characters given any English prompt.", "url": "https://huggingface.co/yashvoladoddi37/kanji-diffusion-v1-4", "project_name": "kanji-diffusion-v1-4", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-08-13 06:06:21", "latest_commit": "2024-08-16 12:14:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Model Overview: 日本語で質問すると、日本語で回答を得られます。", "url": "https://huggingface.co/Ryu-m0m/16bit-japanese-finetuned-mistral-7b-v0", "project_name": "16bit-japanese-finetuned-mistral-7b-v0", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-08-20 14:33:15", "latest_commit": "2024-08-21 15:31:43", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This is my conversion of NilanE/ParallelFiction-Ja_En-100k into json which can be read by text-generation-webui when training a model.", "url": "https://huggingface.co/datasets/mpasila/ParallelFiction-Ja_En-100k-json", "project_name": "ParallelFiction-Ja_En-100k-json", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-03-26 06:08:27", "latest_commit": "2024-04-02 04:46:10", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": ", 2023) was trained on.", "url": "https://huggingface.co/datasets/zan/lima-ja", "project_name": "lima-ja", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2023-07-08 07:35:34", "latest_commit": "2023-07-08 13:39:45", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "This dataset is a subset of the Open Assistant dataset, which contains Japanese conversations only.", "url": "https://huggingface.co/datasets/masajek/openassistant-guanaco-ja", "project_name": "openassistant-guanaco-ja", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2023-08-15 15:38:34", "latest_commit": "2023-08-17 15:36:00", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Dataset Summary JMultiWOZ is a large-scale Japanese multi-domain task-oriented dialogue dataset.", "url": "https://huggingface.co/datasets/nu-dialogue/jmultiwoz", "project_name": "jmultiwoz", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-02-29 15:38:29", "latest_commit": "2024-03-13 02:15:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "日本郵便が提供する「国際郵便内容品の日英・中英訳、HSコード類」（2024/05/09）のデータに基づいています。 ", "url": "https://huggingface.co/datasets/ikeno-ada/Japanese-English_translation_of_contents_HScodes", "project_name": "Japanese-English_translation_of_contents_HScodes", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-05-09 09:29:48", "latest_commit": "2024-05-09 09:37:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "The paper of GIELLM dataset.", "url": "https://huggingface.co/datasets/ganchengguang/Text-Classification-and-Relation-Event-Extraction-Mix-datasets", "project_name": "Text-Classification-and-Relation-Event-Extraction-Mix-datasets", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-10-18 15:02:08", "latest_commit": "2024-10-18 15:05:42", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Term Extraction", "Information Extraction & Text Mining" ] }, { "description": "calm3-22bを使って簡単な日本語の例文を作成したデータセットです。 ", "url": "https://huggingface.co/datasets/if001/elementray_l", "project_name": "elementray_l", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-10-15 15:31:38", "latest_commit": "2024-10-21 13:15:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "This dataset was created using AI Gemini 2.0 Flash Experimental from the original subtitle format.", "url": "https://huggingface.co/datasets/Aihometr/anime-your-name", "project_name": "anime-your-name", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2025-01-01 18:07:19", "latest_commit": "2025-01-01 20:29:41", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "Qwen/Qwen2.5-32B-Instruct-AWQで生成した3ターンのマルチターンinstructionデータセットです。 ", "url": "https://huggingface.co/datasets/Kendamarron/multiturn-qwen2.5-32b", "project_name": "multiturn-qwen2.5-32b", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-11-11 07:28:24", "latest_commit": "2024-11-16 04:33:18", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "description public RLHF dataset in Japanese the construction of the reward model was reformatted into a classification task.", "url": "https://huggingface.co/datasets/ryota39/open_preference-v0.3", "project_name": "open_preference-v0.3", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-06-25 08:55:53", "latest_commit": "2024-07-04 12:55:42", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification" ] }, { "description": "埋め込みモデルの学習、評価のためのクラスタリングデータセットです。 ", "url": "https://huggingface.co/datasets/oshizo/JMDNClustering-ja", "project_name": "JMDNClustering-ja", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-06-23 15:17:04", "latest_commit": "2024-06-23 15:35:29", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "This is a modified version of NilanE/ParallelFiction-Ja_En-100k which has been turned into Alpaca format.", "url": "https://huggingface.co/datasets/mpasila/ParallelFiction-Ja_En-100k-alpaca-4k-context", "project_name": "ParallelFiction-Ja_En-100k-alpaca-4k-context", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-04-18 11:02:42", "latest_commit": "2024-04-18 11:36:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "This dataset is based on the Japanese version of Wikipedia dataset and converted into a multi-turn conversation format using llama2Pro8B. ", "url": "https://huggingface.co/datasets/shi3z/ja_conv_wikipedia_llama2pro8b_3k", "project_name": "ja_conv_wikipedia_llama2pro8b_3k", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-01-08 23:11:22", "latest_commit": "2024-01-09 23:18:17", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Dialogue Systems & Conversational Agents", "Annotation and Dataset Development" ] }, { "description": "データ制作者（t_w）", "url": "https://huggingface.co/datasets/tzmtwtr/tw-posts-japanese", "project_name": "tw-posts-japanese", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2023-06-13 14:31:25", "latest_commit": "2023-06-16 05:17:23", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "Dataset Details Dataset Type:Japanese LLaVA v1.5", "url": "https://huggingface.co/datasets/turing-motors/LLaVA-v1.5-Instruct-620K-JA", "project_name": "LLaVA-v1.5-Instruct-620K-JA", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": "2024-04-10 05:04:58", "latest_commit": "2024-04-12 09:18:42", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "CC-MAIN-2019-51へようこそ本データセットはCommonCrawlerと呼ばれるものから日本語のみを抽出したものです。 ", "url": "https://huggingface.co/datasets/cc-clean/CC-MAIN-2019-51", "project_name": "CC-MAIN-2019-51", "downloads": 20, "source": "Hugging Face", "score": -0.05196297220931407, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "roberta-base-japanese-char-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-char-luw-upos", "project_name": "roberta-base-japanese-char-luw-upos", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2021-12-28 05:01:56", "latest_commit": "2024-08-20 18:21:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Syntactic Parsing", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "BERT base Japanese model This repository contains a BERT base model trained on Japanese Wikipedia dataset.", "url": "https://huggingface.co/colorfulscoop/bert-base-ja", "project_name": "bert-base-ja", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2021-07-30 10:11:35", "latest_commit": "2021-09-23 15:46:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForPreTraining", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "nlp-waseda/gpt2-small-japanese-wikipedia This model is Japanese GPT-2 pretrained on Japanese Wikipedia.", "url": "https://huggingface.co/nlp-waseda/gpt2-small-japanese-wikipedia", "project_name": "gpt2-small-japanese-wikipedia", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2021-12-28 01:22:40", "latest_commit": "2021-12-28 15:31:38", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-base-japanese-unidic Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-unidic", "project_name": "deberta-base-japanese-unidic", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2022-06-08 08:05:33", "latest_commit": "2022-06-18 23:02:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "fasttext-jp-embedding This model is experimental.", "url": "https://huggingface.co/paulhindemith/fasttext-jp-embedding", "project_name": "fasttext-jp-embedding", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2022-11-05 11:14:51", "latest_commit": "2022-11-16 22:21:49", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "FastTextJpModel", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-large-medium", "project_name": "t5-large-medium", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2023-04-26 08:31:45", "latest_commit": "2023-05-10 10:00:45", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "japanese-large-lm-1.7b-instruction-sft-4bit-128g-actorder_False", "url": "https://huggingface.co/line-corporation/japanese-large-lm-1.7b-instruction-sft-4bit-128g-actorder_False", "project_name": "japanese-large-lm-1.7b-instruction-sft-4bit-128g-actorder_False", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2023-09-26 06:15:16", "latest_commit": "2023-09-29 03:19:23", "languages": [], "model_or_dataset": "model", "model_size": 0.446, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models" ] }, { "description": "mlx-community/DeepSeek-R1-Distill-Qwen-32B-Japanese-4bit The Model mlx-community/DeepSeek-R1-Distill-Qwen-32B-Japanese-4bit was converted to MLX format from cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese using mlx-lm version 0.21.1.", "url": "https://huggingface.co/mlx-community/DeepSeek-R1-Distill-Qwen-32B-Japanese-4bit", "project_name": "DeepSeek-R1-Distill-Qwen-32B-Japanese-4bit", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2025-01-27 16:18:47", "latest_commit": "2025-01-27 16:47:10", "languages": [], "model_or_dataset": "model", "model_size": 5.12, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Text Segmentation", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "概要 vecteusは、高性能な日本語大規模言語モデルです。", "url": "https://huggingface.co/Local-Novel-LLM-project/Vecteus-v1-abliterated", "project_name": "Vecteus-v1-abliterated", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Representation Learning" ] }, { "description": "Fine-tuned Japanese Wav2Vec2 model for speech recognition using XLSR-53 large Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using Common Voice, JVS and JSUT.", "url": "https://huggingface.co/Ivydata/wav2vec2-large-xlsr-53-japanese", "project_name": "wav2vec2-large-xlsr-53-japanese", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2023-05-11 08:47:29", "latest_commit": "2023-05-12 02:15:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "makiart/jp-modernbert-large-preview このモデルはABCI 生成AIハッカソンにて提供された計算資源によってAlgomaticチームが作成したモデルです。 ", "url": "https://huggingface.co/makiart/jp-ModernBERT-large-preview", "project_name": "jp-ModernBERT-large-preview", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2025-02-11 13:10:27", "latest_commit": "2025-02-14 09:03:52", "languages": [], "model_or_dataset": "model", "model_size": 0.396, "model_architectures": "ModernBertForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "日本語でtrainingしたllama2 model size: 417.12M trainingは以下のscript参照https://github.com/Lightning-AI/lit-gpt/tree/main use from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained(\"if001/sentencepiece_ja\", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(\"if001/llama2_ja_small\")", "url": "https://huggingface.co/if001/llama2_ja_small", "project_name": "llama2_ja_small", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2023-10-11 09:11:41", "latest_commit": "2023-10-14 13:50:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-base-japanese-unidic-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-unidic-luw-upos", "project_name": "deberta-base-japanese-unidic-luw-upos", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2022-06-08 08:26:25", "latest_commit": "2024-08-20 20:15:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Syntactic Parsing", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "DataPilot/sarashina2.2-3Bx8-moe DataPilot/sarashina2.2-3Bx8-moe は、sbintuitions/sarashina2.2-3b-instruct-v0.1をベースに、mergekit-moeを用いて8つの専門モデルを統合したMixture of Experts（MoE）型の言語モデルです。", "url": "https://huggingface.co/DataPilot/sarashina2.2-3Bx8-moe", "project_name": "sarashina2.2-3Bx8-moe", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2025-03-08 15:52:20", "latest_commit": "2025-03-08 16:10:39", "languages": [], "model_or_dataset": "model", "model_size": 16.6, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "Superswallow-70b-v0.1 Known Performance Issues Two potential bugs have been found in this model: NEED repetition_penalty NEED high temperature Reference: Japanese LLM benchmark results at Nejumi LLM Leaderboad Neo", "url": "https://huggingface.co/nitky/Superswallow-70b-v0.1", "project_name": "Superswallow-70b-v0.1", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-01-12 03:54:16", "latest_commit": "2024-01-20 18:18:09", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality" ] }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-base-beta-70B-GPTQ", "project_name": "japanese-stablelm-base-beta-70B-GPTQ", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2023-11-06 11:33:47", "latest_commit": "2023-11-06 16:00:08", "languages": [], "model_or_dataset": "model", "model_size": 9.1, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "SFCOCO Stable Diffusion Model Card SFCOCO Stable Diffusion is a Japanese-specific latent text-to-image diffusion model capable of generating photo-realistic images given any text input.", "url": "https://huggingface.co/nu-dialogue/sfc2022-stable-diffusion", "project_name": "sfc2022-stable-diffusion", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2022-12-18 04:50:47", "latest_commit": "2022-12-18 07:20:46", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Barba Barba is a multilingual natural language inference model for textual entailment and zero-shot text classification, available as an end-to-end service through TensorFlow Serving.", "url": "https://huggingface.co/hyperonym/barba", "project_name": "barba", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2023-04-29 06:27:12", "latest_commit": "2023-04-29 13:45:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "XLMRobertaForSequenceClassification", "multi_labels": [ "Multilinguality", "Textual Inference", "Language Models", "Semantic Text Processing" ] }, { "description": "doshisha-mil/llama-2-70b-chat-4bit-japanese-v1", "url": "https://huggingface.co/doshisha-mil/llama-2-70b-chat-4bit-japanese-v1", "project_name": "llama-2-70b-chat-4bit-japanese-v1", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2023-08-03 03:21:13", "latest_commit": "2023-08-07 04:25:55", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "SpiralAI Spiral-RetNet-3b-base We have conducted pre-training from scratch on the RetNet (https://arxiv.org/abs/2307.08621)", "url": "https://huggingface.co/Spiral-AI/Spiral-RetNet-3b-base", "project_name": "Spiral-RetNet-3b-base", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-04-30 09:33:26", "latest_commit": "2024-05-01 04:54:26", "languages": [], "model_or_dataset": "model", "model_size": 2.86, "model_architectures": "RetNetForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "DavidAU/alpaca-guanaco-japanese-gpt-1b-Q8_0-GGUF", "url": "https://huggingface.co/DavidAU/alpaca-guanaco-japanese-gpt-1b-Q8_0-GGUF", "project_name": "alpaca-guanaco-japanese-gpt-1b-Q8_0-GGUF", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-04-20 08:49:14", "latest_commit": "2024-04-20 08:49:19", "languages": [], "model_or_dataset": "model", "model_size": 1.39, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Assistance のGGUF版 Our Models for GGUF Vecteus-GGUF Ninja-v1-GGUF Ninja-v1-NSFW-GGUF Ninja-v1-128k-GGUF Ninja-v1-NSFW-128k-GGUF", "url": "https://huggingface.co/Local-Novel-LLM-project/Assistance-GGUF", "project_name": "Assistance-GGUF", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-05-03 04:03:47", "latest_commit": "2024-05-03 04:30:45", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "XML-RoBERTa-NER-Japanese This model is a fine-tuned version of xlm-roberta-base on the Wikipedia Japanese NER dataset from Stockmark Inc.", "url": "https://huggingface.co/ithattieu/XML-RoBERTa-NER-Japanese", "project_name": "XML-RoBERTa-NER-Japanese", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-08-17 08:18:04", "latest_commit": "2024-08-18 04:03:33", "languages": [], "model_or_dataset": "model", "model_size": 0.277, "model_architectures": "RobertaForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Llama-3-EZO-VLM-1 Based on SakanaAI/Llama-3-EvoVLM-JP-v2, it has been enhanced for Japanese usage through additional pre-training and instruction tuning.", "url": "https://huggingface.co/AXCXEPT/Llama-3-EZO-VLM-1", "project_name": "Llama-3-EZO-VLM-1", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-08-03 17:15:09", "latest_commit": "2024-08-23 10:55:53", "languages": [], "model_or_dataset": "model", "model_size": 8.48, "model_architectures": "LlavaForConditionalGeneration", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "gpt2-small-japanese-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/gpt2-small-japanese-ud-causal", "project_name": "gpt2-small-japanese-ud-causal", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-08-22 13:33:02", "latest_commit": "2024-08-25 17:54:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "275.86Mのmixtralを日本語データセットでpretrainingしたものです sample from transformers import AutoTokenizer, AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained(\"if001/tiny_mixtral_ja\")", "url": "https://huggingface.co/hibikaze/tiny_mixtral_ja_with_tokenizer", "project_name": "tiny_mixtral_ja_with_tokenizer", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-07-20 05:30:59", "latest_commit": "2024-07-20 05:33:38", "languages": [], "model_or_dataset": "model", "model_size": 0.276, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Lurunchik/WikiHowNFQAを日本語に翻訳し、人手でクリーニングしたデータセットです。", "url": "https://huggingface.co/datasets/GENIAC-Team-Ozaki/WikiHowNFQA-ja_cleaned", "project_name": "WikiHowNFQA-ja_cleaned", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-05-10 23:31:18", "latest_commit": "2024-05-10 23:34:09", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces" ] }, { "description": "データ制作者（t_w）", "url": "https://huggingface.co/datasets/tzmtwtr/tw-posts-japanese-v2", "project_name": "tw-posts-japanese-v2", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2023-06-23 14:09:41", "latest_commit": "2023-06-23 19:53:02", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This pre-training dataset was created for shisa-base-7b-v1.", "url": "https://huggingface.co/datasets/augmxnt/shisa-pretrain-en-ja-v1", "project_name": "shisa-pretrain-en-ja-v1", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2023-11-19 09:48:04", "latest_commit": "2023-12-05 20:08:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "calm3-22bを使って簡単な日本語の例文を作成したデータセットです。 ", "url": "https://huggingface.co/datasets/if001/elementray_m", "project_name": "elementray_m", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-09-25 11:22:49", "latest_commit": "2024-09-28 08:59:49", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "This repository contains the dataset used for the TaCo paper.", "url": "https://huggingface.co/datasets/saillab/alpaca-japanese-cleaned", "project_name": "alpaca-japanese-cleaned", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-09-20 23:21:58", "latest_commit": "2024-09-20 23:32:52", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Responsible & Trustworthy NLP", "Cross-Lingual Transfer", "Low-Resource NLP", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Dataset Summary This dataset is a Japanese-translated subset of the NuminaMath CoT dataset, containing the first 100k samples from the original dataset.", "url": "https://huggingface.co/datasets/Inoichan/NuminaMath-CoT-JA-100K", "project_name": "NuminaMath-CoT-JA-100K", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2025-01-05 05:30:50", "latest_commit": "2025-01-05 17:22:46", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning", "Annotation and Dataset Development" ] }, { "description": "Magpie-Tanuki-Instruction-Selected-Evolved-26.5k 概要以下の手順で作成した約2万6500件の日本語の合成instructionデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Magpie-Tanuki-Instruction-Selected-Evolved-26.5k", "project_name": "Magpie-Tanuki-Instruction-Selected-Evolved-26.5k", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-11-29 08:10:32", "latest_commit": "2024-12-15 06:10:27", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "以下の条件に同意したうえで、公開されたモデル及びデータセット等（以下「本コンテンツ」）といいます）をダウンロードします。 ", "url": "https://huggingface.co/datasets/weblab-GENIAC/Open-Platypus-Japanese-masked", "project_name": "Open-Platypus-Japanese-masked", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Quiz Works様に掲載のクイズのうち、2024年8月4日~8月5日時点において取得可能だったクイズを収載したデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/quiz-works", "project_name": "quiz-works", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-08-04 05:57:46", "latest_commit": "2024-08-04 21:25:11", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "概要このデータセットは日本語LLMの評価用としてよく用いられるelyza/ELYZA-tasks-100について人間が回答を行った結果です。", "url": "https://huggingface.co/datasets/YukiTomita-CC/ELYZA-tasks-100_Human_solved", "project_name": "ELYZA-tasks-100_Human_solved", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-07-15 01:33:31", "latest_commit": "2024-07-17 03:55:33", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "概要 reazon-research/reazonspeech-v2[all]をspeechMOSにて音声品質の分析を行った結果です。 ", "url": "https://huggingface.co/datasets/ayousanz/reazon-speech-v2-all-speechMOS-analyze", "project_name": "reazon-speech-v2-all-speechMOS-analyze", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-04-16 06:13:47", "latest_commit": "2024-04-16 16:59:16", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "JAQKET から CC-BY-SA のデータのみを含めたデータセット AI王公式配布データセット(JAQKET) で配布されているクイズデータのうち、ライセンスが CC-BY-SA-4.0のデータのみを含めたデータセットです。 ", "url": "https://huggingface.co/datasets/hotchpotch/jaqket_cc", "project_name": "jaqket_cc", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-02-20 09:35:00", "latest_commit": "2024-02-20 09:43:45", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "fungi_trait_circus_database大菌輪「Trait Circus」データセット（統制形質）最終更新日：2023/12/29 Languages Japanese and English Please do not use this dataset for academic purposes for the time being. ", "url": "https://huggingface.co/datasets/Atsushi/fungi_trait_circus_database", "project_name": "fungi_trait_circus_database", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2022-02-23 11:19:31", "latest_commit": "2023-12-29 06:22:46", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "国立国会図書館の書誌データから作成した振り仮名のデータセット A dataset of furigana characters created from bibliographic data from the National Diet Library.", "url": "https://huggingface.co/datasets/AlienKevin/ndlbib-furigana", "project_name": "ndlbib-furigana", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-06-23 06:07:38", "latest_commit": "2024-06-23 10:08:55", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "chatbot-arena-ja-calm2-7b-chatからpromptが一致するデータを削除したデータセットです。", "url": "https://huggingface.co/datasets/GENIAC-Team-Ozaki/chatbot-arena-ja-calm2-7b-chat-experimental_deduped", "project_name": "chatbot-arena-ja-calm2-7b-chat-experimental_deduped", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": "2024-05-02 08:30:14", "latest_commit": "2024-05-02 08:31:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "お～いお茶新俳句大賞受賞作品データセット 221の俳句が含まれ、うち200前後は作者と審査員のコメントが付属。 ", "url": "https://huggingface.co/datasets/p1atdev/oiocha", "project_name": "oiocha", "downloads": 19, "source": "Hugging Face", "score": -0.051970071335008576, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "roberta-large-japanese-aozora-char Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-aozora-char", "project_name": "roberta-large-japanese-aozora-char", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2021-12-30 14:19:53", "latest_commit": "2022-06-22 10:22:43", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "bert-large-japanese-char-extended Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-char-extended", "project_name": "bert-large-japanese-char-extended", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2021-06-04 13:29:34", "latest_commit": "2024-08-20 17:45:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "ELECTRA small Japanese generator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-small-japanese-generator", "project_name": "electra-small-japanese-generator", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2021-10-04 13:43:37", "latest_commit": "2023-10-21 13:21:28", "languages": [], "model_or_dataset": "model", "model_size": 0.013800000000000002, "model_architectures": "ElectraForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Wav2Vec2-Large-XLSR-53-Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice, and JSUT dataset{s}.", "url": "https://huggingface.co/qqpann/w2v_hf_jsut_xlsr53", "project_name": "w2v_hf_jsut_xlsr53", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2021-03-29 06:38:38", "latest_commit": "2021-04-01 23:49:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "このモデルはdeberta-v2-large-japaneseをファインチューニングして固有表現抽出（NER）に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-large-japanese-finetuned-ner", "project_name": "deberta-v2-large-japanese-finetuned-ner", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2023-05-10 13:22:23", "latest_commit": "2023-07-21 14:10:02", "languages": [], "model_or_dataset": "model", "model_size": 0.339, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition" ] }, { "description": "japanese-gpt2-medium-unidic This is a medium-sized Japanese GPT-2 model using BERT-like tokenizer.", "url": "https://huggingface.co/okazaki-lab/japanese-gpt2-medium-unidic", "project_name": "japanese-gpt2-medium-unidic", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2023-02-27 05:42:22", "latest_commit": "2023-03-22 06:22:32", "languages": [], "model_or_dataset": "model", "model_size": 0.362, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-small-japanese-aozora Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-small-japanese-aozora", "project_name": "deberta-small-japanese-aozora", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2022-05-23 04:58:53", "latest_commit": "2023-01-15 15:25:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "Manga OCR Optical character recognition for Japanese text, with the main focus being Japanese manga.", "url": "https://huggingface.co/TeamFnord/manga-ocr", "project_name": "manga-ocr", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2022-01-15 17:39:06", "latest_commit": "2022-02-10 07:50:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VisionEncoderDecoderModel", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "回答と回答が出てくるパラグラフを与えると質問文を生成するモデル SEE: https://github.com/sonoisa/deep-question-generation 本モデルの作成ステップ概要 SQuAD 1.1を日本語に機械翻訳し、不正なデータをクレンジング（有効なデータは約半分）。", "url": "https://huggingface.co/sonoisa/t5-base-japanese-question-generation", "project_name": "t5-base-japanese-question-generation", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2021-04-03 14:08:55", "latest_commit": "2022-03-11 02:50:33", "languages": [], "model_or_dataset": "model", "model_size": 0.223, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Dialogue Response Generation", "Text Style Transfer", "Text Generation" ] }, { "description": "zenz-v2.5-small zenz-v2.5はかな漢字変換タスクに特化したGPT-2アーキテクチャの条件付き言語モデルです。", "url": "https://huggingface.co/Miwa-Keita/zenz-v2.5-small", "project_name": "zenz-v2.5-small", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2025-01-13 07:46:48", "latest_commit": "2025-01-13 16:09:27", "languages": [], "model_or_dataset": "model", "model_size": 0.0905, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-large-japanese-unidic-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-unidic-ud-head", "project_name": "deberta-large-japanese-unidic-ud-head", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2022-06-19 00:10:56", "latest_commit": "2023-11-05 17:51:08", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Semantic Text Processing" ] }, { "description": "modernBERTでNERにチャレンジラベルのマッピング label_list = [\"O\", \"B-人名\", \"I-人名\", \"B-法人名\", \"I-法人名\", \"B-政治的組織名\", \"I-政治的組織名\", \"B-その他の組織名\", \"I-その他の組織名\", \"B-地名\", \"I-地名\", \"B-施設名\", \"I-施設名\", \"B-製品名\", \"I-製品名\", \"B-イベント名\", \"I-イベント名\"] tokenizer 以下を参考にしてください。 ", "url": "https://huggingface.co/Chottokun/modernBERT_japanese_30m_ner_wikipedia", "project_name": "modernBERT_japanese_30m_ner_wikipedia", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2025-02-28 12:42:23", "latest_commit": "2025-03-21 09:54:15", "languages": [], "model_or_dataset": "model", "model_size": 0.132, "model_architectures": "ModernBertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Text Segmentation", "Information Retrieval", "Text Classification" ] }, { "description": "This model is a merged version of qwen-14b-vntl and Qwen1.5-14B-Chat , aiming for the translation of Japanese context into Chinese.", "url": "https://huggingface.co/GralchemOz/Qwen1.5-14B-vntl-jp2zh-4.5bpw-h6-exl2", "project_name": "Qwen1.5-14B-vntl-jp2zh-4.5bpw-h6-exl2", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2024-03-03 02:29:43", "latest_commit": "2024-03-03 03:17:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models" ] }, { "description": "ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1 English description here 概要 Llama-2ベースの学習済み日本語モデルであるelyza/ELYZA-japanese-Llama-2-7b-fastと、そのinstruction tuningモデルであるelyza/ELYZA-japanese-Llama-2-7b-fast-instruct を、mergekitを使ってMoEを行い作成したモデルです。 ", "url": "https://huggingface.co/Aratako/ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1", "project_name": "ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2024-03-06 15:52:13", "latest_commit": "2024-03-19 02:29:56", "languages": [], "model_or_dataset": "model", "model_size": 11.2, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Multilinguality", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "TaCOMET_ja", "url": "https://huggingface.co/nlp-waseda/tacomet-gpt2-xl-japanese", "project_name": "tacomet-gpt2-xl-japanese", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2024-03-07 11:50:15", "latest_commit": "2024-06-05 09:41:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Reasoning" ] }, { "description": "ドミニオン日本語LLM for Whisper（2023/12/19 1.0版）概要 Whisperでドミニオン（ボードゲーム）のカード用語などを含んだ音声を文字起こし出来ることを目標にチューニングされたLLMです。 ", "url": "https://huggingface.co/ketman/whisper_for_dominion", "project_name": "whisper_for_dominion", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2023-12-08 13:55:30", "latest_commit": "2023-12-19 09:11:59", "languages": [], "model_or_dataset": "model", "model_size": 1.54, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Language Models" ] }, { "description": "Swallow-MX-8x7b-NVE-v0.1に対し、 Mixtral-8x7B-Instruct-v0.1とMixtral-8x7B-v0.1の差分をマージしたモデルです。 ", "url": "https://huggingface.co/aixsatoshi/Swallow-MX-8x7b-NVE-chatvector-Mixtral-instruct-v2", "project_name": "Swallow-MX-8x7b-NVE-chatvector-Mixtral-instruct-v2", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2024-03-22 08:26:53", "latest_commit": "2024-03-23 04:17:50", "languages": [], "model_or_dataset": "model", "model_size": 46.7, "model_architectures": "MixtralForCausalLM", "multi_labels": [] }, { "description": "Made using Gpt-Small from scratch for learning purpose.", "url": "https://huggingface.co/tirthadagr8/Japanese_to_english_gpt2CasualLM_GemmaTokenizer", "project_name": "Japanese_to_english_gpt2CasualLM_GemmaTokenizer", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2024-11-16 08:21:48", "latest_commit": "2025-01-07 18:20:35", "languages": [], "model_or_dataset": "model", "model_size": 0.28200000000000003, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "RakutenAI-7B-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/RakutenAI-7B-upos", "project_name": "RakutenAI-7B-upos", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2024-07-11 06:42:57", "latest_commit": "2024-08-20 17:20:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Ruri-Reranker: Japanese General Reranker Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-reranker-stage1-small", "project_name": "ruri-reranker-stage1-small", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2024-08-19 09:44:00", "latest_commit": "2024-09-04 08:53:02", "languages": [], "model_or_dataset": "model", "model_size": 0.06870000000000001, "model_architectures": "DistilBertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "DSR1D-Llama-8B-aya-ja-1k-generated これはdeepseek-ai/DeepSeek-R1-Distill-Llama-8Bを用いて、weblab-GENIAC/aya-ja-evol-instruct-calm3-dpo-maskedの最初の1000件の応答をmax_new_tokens=3060で生成させました。 ", "url": "https://huggingface.co/datasets/kurogane/DSR1D-Llama-8B-aya-ja-1k-generated", "project_name": "DSR1D-Llama-8B-aya-ja-1k-generated", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2025-01-22 13:41:18", "latest_commit": "2025-01-22 13:53:22", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Data created manually", "url": "https://huggingface.co/datasets/tomo1222/Japanese-QA111dataset", "project_name": "Japanese-QA111dataset", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2024-12-14 13:54:53", "latest_commit": "2024-12-14 14:07:10", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Tagging", "Morphology" ] }, { "description": "Corrected MT-Bench-ja Inflection AIによるCorrected MT-Benchの日本語訳です。 ", "url": "https://huggingface.co/datasets/karakuri-ai/corrected-mt-bench-ja", "project_name": "corrected-mt-bench-ja", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2024-07-11 06:25:14", "latest_commit": "2024-07-11 12:47:19", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "概要 Common Voice Corpus 17.0をspeechMOSにて音声品質の分析を行った結果です。 ", "url": "https://huggingface.co/datasets/ayousanz/common-voice-speechMOS-analyze", "project_name": "common-voice-speechMOS-analyze", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2024-05-29 15:34:35", "latest_commit": "2024-05-29 15:55:23", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Kaidan Nihonbunka: A Journey Through Hyakumonogatari's Ghostly Tales Welcome to the Kaidan Nihonbunka Dataset About Name kaidan Nihonbunka translates to 怪談日本文化 in Japanese: 怪談 (Kwaidan): Ghost story or supernatural tale.", "url": "https://huggingface.co/datasets/mohamed-khalil/KaidanNihonbunka", "project_name": "KaidanNihonbunka", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2024-04-15 15:43:41", "latest_commit": "2024-04-15 16:03:13", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation", "Machine Translation" ] }, { "description": "Japanese-Vietnamese Translated Sentence Pairs.", "url": "https://huggingface.co/datasets/dichmau/ja_vi_translation", "project_name": "ja_vi_translation", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2024-03-13 17:52:50", "latest_commit": "2024-04-08 19:35:06", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "mmarcoデータセットのquery--passageのペアについて、queryをkeyとして重複を削除したデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/mmarco-ja", "project_name": "mmarco-ja", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2024-04-07 04:17:03", "latest_commit": "2024-11-12 03:20:22", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "東方トカマクラブデータセット概要このデータセットは、東方Projectのトカマクラブに関する情報を収集したものです。", "url": "https://huggingface.co/datasets/MakiAi/Tokama_Club_QA", "project_name": "Tokama_Club_QA", "downloads": 18, "source": "Hugging Face", "score": -0.05197717046070309, "first_commit": "2023-12-20 15:37:51", "latest_commit": "2023-12-20 15:46:02", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This model is a voice clone of myself created specifically for Style Bert VITS2.", "url": "https://huggingface.co/ThePioneer/MyVoiceClone-Style-Bert-VITS2", "project_name": "MyVoiceClone-Style-Bert-VITS2", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2024-02-29 19:34:12", "latest_commit": "2024-03-04 10:43:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "llm-jp-3-980m-instruct2 LLM-jp-3 is the series of large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-980m-instruct2", "project_name": "llm-jp-3-980m-instruct2", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2025-01-27 06:18:33", "latest_commit": "2025-02-04 04:57:12", "languages": [], "model_or_dataset": "model", "model_size": 0.99, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "This model is a fine-tuned version of facebook/wav2vec2-xls-r-1b on the MOZILLA-FOUNDATION/COMMON_VOICE_8_0 - JA dataset.", "url": "https://huggingface.co/AndrewMcDowell/wav2vec2-xls-r-1b-japanese-hiragana-katakana", "project_name": "wav2vec2-xls-r-1b-japanese-hiragana-katakana", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2022-02-04 11:27:09", "latest_commit": "2022-03-24 11:56:32", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "roberta-large-japanese-aozora Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-aozora", "project_name": "roberta-large-japanese-aozora", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2021-12-26 13:08:52", "latest_commit": "2022-10-15 14:22:11", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "roberta-base-japanese-aozora Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-aozora", "project_name": "roberta-base-japanese-aozora", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2021-12-21 00:04:03", "latest_commit": "2022-10-15 14:20:11", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "ELECTRA small Japanese generator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-small-paper-japanese-generator", "project_name": "electra-small-paper-japanese-generator", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2021-10-04 13:47:24", "latest_commit": "2023-10-21 13:21:31", "languages": [], "model_or_dataset": "model", "model_size": 0.00491, "model_architectures": "ElectraForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "roberta-large-japanese-aozora-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-aozora-ud-goeswith", "project_name": "roberta-large-japanese-aozora-ud-goeswith", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2022-10-15 04:15:39", "latest_commit": "2024-08-20 18:51:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "distilhubert-ft-japanese-50k Fine-tuned (more precisely, continue trained)", "url": "https://huggingface.co/TylorShine/distilhubert-ft-japanese-50k", "project_name": "distilhubert-ft-japanese-50k", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2023-04-20 17:51:47", "latest_commit": "2023-04-21 18:00:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "HubertModel", "multi_labels": [ "Representation Learning", "Language Models", "Green & Sustainable NLP", "Semantic Text Processing" ] }, { "description": "Heron GIT Japanese ELYZA Llama 2 Fast 7B Model Details Heron GIT Japanese ELYZA Llama 2 Fast 7B is a vision-language model that can converse about input images.", "url": "https://huggingface.co/turing-motors/heron-chat-git-ELYZA-fast-7b-v0", "project_name": "heron-chat-git-ELYZA-fast-7b-v0", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2023-09-06 09:04:40", "latest_commit": "2023-09-11 16:56:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GitLlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "spekulatius マージしているとたまに出てくる「目的の意図とは違うのだけどなんだか消すにはもったいないモデル」をおすそ分けするシリーズです。 ", "url": "https://huggingface.co/Lasorco/spekulatius", "project_name": "spekulatius", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2023-10-24 13:56:21", "latest_commit": "2023-10-26 04:21:35", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "What is this model?", "url": "https://huggingface.co/Yokohide031/rust_cl-tohoku_bert-large-japanese", "project_name": "rust_cl-tohoku_bert-large-japanese", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2022-03-13 02:12:20", "latest_commit": "2022-03-13 14:53:03", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "記事本文からタイトルを生成するモデル SEE: https://qiita.com/sonoisa/items/a9af64ff641f0bbfed44", "url": "https://huggingface.co/sonoisa/t5-base-japanese-title-generation", "project_name": "t5-base-japanese-title-generation", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2021-04-04 06:57:18", "latest_commit": "2022-02-21 13:38:09", "languages": [], "model_or_dataset": "model", "model_size": 0.223, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "FuseO1-DeepSeekR1-QwQ-SkyT1-Flash-Japanese-32B-Preview 💡 This model was created based on FuseO1-DeepSeekR1-QwQ-SkyT1-Flash-32B-Preview.yaml", "url": "https://huggingface.co/nitky/FuseO1-DeepSeekR1-QwQ-SkyT1-Flash-Japanese-32B-Preview", "project_name": "FuseO1-DeepSeekR1-QwQ-SkyT1-Flash-Japanese-32B-Preview", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2025-02-10 00:47:01", "latest_commit": "2025-02-10 01:37:23", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Text Generation" ] }, { "description": "llm-jp-13b-instruct-lora-jaster-v1.0", "url": "https://huggingface.co/llm-jp/llm-jp-13b-instruct-lora-jaster-v1.0", "project_name": "llm-jp-13b-instruct-lora-jaster-v1.0", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2023-10-18 18:53:58", "latest_commit": "2023-10-20 08:41:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Llama-3.1-Swallow-8B-v0.2-reasoningvector-deepseek-r1 DeepSeekの蒸留モデルから推論能力を抽出した重みの差分を、日本語モデルにマージしたモデルです。", "url": "https://huggingface.co/HachiML/Llama-3.1-Swallow-8B-v0.2-reasoningvector-deepseek-r1", "project_name": "Llama-3.1-Swallow-8B-v0.2-reasoningvector-deepseek-r1", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2025-02-18 13:27:11", "latest_commit": "2025-02-18 15:31:33", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Reasoning", "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "SlaughterHouse Exp 7B Model Description", "url": "https://huggingface.co/Elizezen/SlaughterHouse-exp-7B", "project_name": "SlaughterHouse-exp-7B", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2025-01-27 07:01:08", "latest_commit": "2025-01-27 07:45:40", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "ebisuke/liz-nojaloli-ja License MIT Licenseベースとしてrinna/japanese-gpt-neox-3.6bを使用しています。 ", "url": "https://huggingface.co/ebisuke/liz-nojaloli-ja", "project_name": "liz-nojaloli-ja", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2023-05-23 16:59:22", "latest_commit": "2023-05-30 16:01:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "roberta-base-japanese-juman-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-juman-ud-goeswith", "project_name": "roberta-base-japanese-juman-ud-goeswith", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2023-02-21 03:43:52", "latest_commit": "2024-08-30 14:46:25", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/sosoai/Orion-14B-Chat-safetensors", "project_name": "Orion-14B-Chat-safetensors", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2024-01-24 23:44:38", "latest_commit": "2024-01-25 02:08:59", "languages": [], "model_or_dataset": "model", "model_size": 14.5, "model_architectures": "OrionForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "ELYZA-japanese-Llama-2-MoE-2x13B-v0.1 English description here 概要 Llama-2ベースの学習済み日本語モデルであるelyza/ELYZA-japanese-Llama-2-13bと、そのinstruction tuningモデルであるelyza/ELYZA-japanese-Llama-2-13b-instruct を、mergekitを使ってMoEを行い作成したモデルです。 ", "url": "https://huggingface.co/Aratako/ELYZA-japanese-Llama-2-MoE-2x13B-v0.1", "project_name": "ELYZA-japanese-Llama-2-MoE-2x13B-v0.1", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2024-03-03 09:25:37", "latest_commit": "2024-03-19 02:34:53", "languages": [], "model_or_dataset": "model", "model_size": 21.5, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Multilinguality", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "karasu-lora-jp-qa-chat karasu fine tuned model by lora method with the original Q&A dataset.", "url": "https://huggingface.co/aipib/karasu-lora-jp-qa-chat", "project_name": "karasu-lora-jp-qa-chat", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2024-04-24 02:26:58", "latest_commit": "2024-06-03 01:02:33", "languages": [], "model_or_dataset": "model", "model_size": 1.1, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Phos 7B 「どうかお慈悲をもう疲れ果てました」生成例 [太字以降がAI生成] 「どうか」 ”それ”は懇願した。 ", "url": "https://huggingface.co/Elizezen/Phos-7B", "project_name": "Phos-7B", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2024-04-24 02:11:32", "latest_commit": "2024-04-24 07:32:31", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Example ESPnet2 TTS model kan-bayashi/jsut_conformer_fastspeech2 ♻", "url": "https://huggingface.co/espnet/kan-bayashi_jsut_conformer_fastspeech2", "project_name": "kan-bayashi_jsut_conformer_fastspeech2", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2021-07-03 14:46:06", "latest_commit": "2021-07-03 10:46:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "rinna-gpt-neox-small-japanese-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/rinna-gpt-neox-small-japanese-ud-causal", "project_name": "rinna-gpt-neox-small-japanese-ud-causal", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2024-09-08 01:53:03", "latest_commit": "2024-09-12 22:30:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "goldfish-gpt2-japanese-5mb-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/goldfish-gpt2-japanese-5mb-ud-causal", "project_name": "goldfish-gpt2-japanese-5mb-ud-causal", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2024-09-09 03:34:02", "latest_commit": "2024-09-12 22:36:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Animagine系のモデルをミックスしたVAE内蔵マージモデルです。", "url": "https://huggingface.co/Noginowa/AnimaMixColorXL", "project_name": "AnimaMixColorXL", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2024-08-15 07:15:47", "latest_commit": "2024-10-03 06:27:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "gpt2-small-japanese-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/gpt2-small-japanese-upos", "project_name": "gpt2-small-japanese-upos", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2024-06-22 22:28:58", "latest_commit": "2024-07-27 07:49:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "シサム語による説明アイヌ語と日本語の双方向機械翻訳モデルです。 ", "url": "https://huggingface.co/SoMiyagawa/AinuTrans-2.0", "project_name": "AinuTrans-2.0", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Dataset Details For the original NTX dataset, the conversion to the Aya instructions format, or more details, please refer to the full dataset in instruction form (https://huggingface.co/datasets/tellarin-ai/ntx_llm_instructions)", "url": "https://huggingface.co/datasets/tellarin-ai/ntx_llm_inst_japanese", "project_name": "ntx_llm_inst_japanese", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2023-12-20 15:16:59", "latest_commit": "2023-12-20 15:17:41", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "合成日本語指示データセット概要このデータセットは、大規模言語モデル（Qwen2.5-32B-instruct）", "url": "https://huggingface.co/datasets/DeL-TaiseiOzaki/magpie-qwen2.5-32B-10K-ja", "project_name": "magpie-qwen2.5-32B-10K-ja", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2024-10-14 22:20:52", "latest_commit": "2024-10-14 22:54:57", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "クイズの杜様に掲載のクイズのうち、2024年8月5日時点において取得可能だったクイズのうち「二次利用許諾レベル」が「フリー」であったものを収載したデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/quiz-no-mori", "project_name": "quiz-no-mori", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2024-08-05 01:24:25", "latest_commit": "2024-08-05 08:04:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing" ] }, { "description": "ApolloCorpus-ja 概要多言語医療データセットの ApolloCorpus を日本語に自動翻訳した 525k の指示チューニングデータセットになります。", "url": "https://huggingface.co/datasets/kunishou/ApolloCorpus-ja", "project_name": "ApolloCorpus-ja", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2024-03-13 02:08:51", "latest_commit": "2024-03-13 17:55:25", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "概要大規模言語モデル(LLM)用の固有表現認識データセット(J-NER)のリポジトリです。 ", "url": "https://huggingface.co/datasets/sergicalsix/Japanese_NER_Data_Hub", "project_name": "Japanese_NER_Data_Hub", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition" ] }, { "description": "It is just a dataset of dolly-15k-jp(*1)", "url": "https://huggingface.co/datasets/Coaso/test-dolly-15ja-for-stftrainer", "project_name": "test-dolly-15ja-for-stftrainer", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2023-06-07 01:40:58", "latest_commit": "2023-06-07 02:29:14", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "diet-members-voice-embeddings 日本の国会議員の声を speechbrain/spkrec-ecapa-voxcelebで embedding したデータセットです。", "url": "https://huggingface.co/datasets/yutakobayashi/diet-members-voice-embeddings", "project_name": "diet-members-voice-embeddings", "downloads": 17, "source": "Hugging Face", "score": -0.0519842695863976, "first_commit": "2023-07-28 12:50:41", "latest_commit": "2023-07-28 23:32:52", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Representation Learning", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "studio-ousia/luke-japanese-baseに対して次の変更を加えたモデルです。 ", "url": "https://huggingface.co/uzabase/luke-japanese-wordpiece-base", "project_name": "luke-japanese-wordpiece-base", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-08-10 06:04:58", "latest_commit": "2023-11-28 13:35:07", "languages": [], "model_or_dataset": "model", "model_size": 0.28600000000000003, "model_architectures": "LukeForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Ninja-v1-RP GGUF版はこちら/Click here for the GGUF version 概要 This is a merge of pre-trained language models created using mergekit. ", "url": "https://huggingface.co/Aratako/Ninja-v1-RP", "project_name": "Ninja-v1-RP", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-05-20 13:04:23", "latest_commit": "2024-05-24 15:10:41", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "modernbert-large-japanese-aozora Model Description", "url": "https://huggingface.co/KoichiYasuoka/modernbert-large-japanese-aozora", "project_name": "modernbert-large-japanese-aozora", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2025-01-07 23:32:44", "latest_commit": "2025-02-05 16:04:58", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ModernBertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese transformer pipeline (bert-base).", "url": "https://huggingface.co/hiroshi-matsuda-rit/ja_gsd_bert_wwm_unidic_lite", "project_name": "ja_gsd_bert_wwm_unidic_lite", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2021-07-08 12:11:06", "latest_commit": "2021-08-11 20:25:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Representation Learning", "Named Entity Recognition", "Language Models", "Semantic Text Processing" ] }, { "description": "BERT base Japanese (character-level tokenization with whole word masking, jawiki-20200831)", "url": "https://huggingface.co/hiroshi-matsuda-rit/bert-base-japanese-basic-char-v2", "project_name": "bert-base-japanese-basic-char-v2", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2021-08-04 11:01:49", "latest_commit": "2021-09-23 16:49:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "GPT-Neo 1.3B pre-trained model for Japanese Model Description GPT2/GPT3 like model trained on Japanese.corpus.", "url": "https://huggingface.co/yellowback/gpt-neo-japanese-1.3B", "project_name": "gpt-neo-japanese-1.3B", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2021-12-09 08:09:40", "latest_commit": "2021-12-09 17:59:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoForCausalLM", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese GPT2 Lyric Model Model description", "url": "https://huggingface.co/skytnt/gpt2-japanese-lyric-small", "project_name": "gpt2-japanese-lyric-small", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2022-04-21 04:25:18", "latest_commit": "2023-10-23 12:46:36", "languages": [], "model_or_dataset": "model", "model_size": 0.123, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Responsible & Trustworthy NLP", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "electra-base-cyberbullying This is a BERT Base model for the Japanese language finetuned for automatic cyberbullying detection.", "url": "https://huggingface.co/kit-nlp/bert-base-japanese-basic-char-v2-cyberbullying", "project_name": "bert-base-japanese-basic-char-v2-cyberbullying", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2022-09-08 09:09:39", "latest_commit": "2022-11-01 07:20:52", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Responsible & Trustworthy NLP", "Language Models", "Semantic Text Processing" ] }, { "description": "whisper-large-v2-mix-jp model for CTranslate2 This repository contains the conversion of vumichien/whisper-large-v2-mix-jp to the CTranslate2 model format.", "url": "https://huggingface.co/arc-r/faster-whisper-large-v2-mix-jp", "project_name": "faster-whisper-large-v2-mix-jp", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-07-07 05:53:52", "latest_commit": "2023-07-07 17:56:03", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "deberta-base-japanese-juman-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-juman-ud-goeswith", "project_name": "deberta-base-japanese-juman-ud-goeswith", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-02-05 06:48:32", "latest_commit": "2023-05-12 01:16:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "roberta-small-hi-char-mlm Model Description", "url": "https://huggingface.co/nakamura196/roberta-small-hi-char-mlm", "project_name": "roberta-small-hi-char-mlm", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2022-07-14 20:34:59", "latest_commit": "2022-07-22 09:10:42", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-base-japanese-aozora-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-aozora-ud-goeswith", "project_name": "deberta-base-japanese-aozora-ud-goeswith", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2022-10-14 09:43:58", "latest_commit": "2024-08-20 18:52:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "このモデルはdeberta-v2-base-japaneseをファインチューニングしてCommonsenseQA(選択式の質問)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-base-juman-finetuned-commonsenseqa", "project_name": "deberta-v2-base-juman-finetuned-commonsenseqa", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-02-03 04:49:19", "latest_commit": "2023-05-26 15:07:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMultipleChoice", "multi_labels": [ "Reasoning", "Language Models", "Commonsense Reasoning", "Semantic Text Processing" ] }, { "description": "reazonspeech-espnet-v1 reazonspeech-espnet-v1 is an ESPnet model trained for Japanese automatic speech recognition (ASR).", "url": "https://huggingface.co/reazon-research/reazonspeech-espnet-v1", "project_name": "reazonspeech-espnet-v1", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-01-13 07:44:37", "latest_commit": "2023-01-16 16:44:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "Unihan LM: Coarse-to-Fine Chinese-Japanese Language Model Pretraining with the Unihan Database Model description Chinese and Japanese share many characters with similar surface morphology.", "url": "https://huggingface.co/microsoft/unihanlm-base", "project_name": "unihanlm-base", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2020-09-27 11:23:02", "latest_commit": "2021-09-22 11:00:56", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "XLMModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "ESを書くAI Japanese GPT-2 modelをファインチューニングしましたファインチューニングには、内定者の二万件以上のESを用いました。 ", "url": "https://huggingface.co/huranokuma/es", "project_name": "es", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2022-08-01 14:59:47", "latest_commit": "2022-08-14 05:47:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "alpaca-guanaco-japanese-gpt-1b 1.3Bパラメータの日本語GPTモデルを使用した対話AIです。", "url": "https://huggingface.co/inu-ai/alpaca-guanaco-japanese-gpt-1b", "project_name": "alpaca-guanaco-japanese-gpt-1b", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-04-12 00:18:29", "latest_commit": "2023-04-13 10:25:48", "languages": [], "model_or_dataset": "model", "model_size": 1.33, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "t5-base-japanese-web-8k (with Byte-fallback, 8K) Description megagonlabs/t5-base-japanese-web-8k is a T5 (Text-to-Text Transfer Transformer) model pre-trained on Japanese web texts.", "url": "https://huggingface.co/megagonlabs/t5-base-japanese-web-8k", "project_name": "t5-base-japanese-web-8k", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2021-09-06 10:13:42", "latest_commit": "2023-07-04 07:05:38", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "dolly-japanese-gpt-1b-clone 概要 rinna社の「japanese-gpt-1b」を、日本語データセット「databricks-dolly-15k-ja」を使用して学習させた推論モデルです。 ", "url": "https://huggingface.co/ce-lery/dolly-japanese-gpt-1b-clone", "project_name": "dolly-japanese-gpt-1b-clone", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-05-06 03:16:54", "latest_commit": "2023-05-07 15:47:23", "languages": [], "model_or_dataset": "model", "model_size": 1.33, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "ベースモデル：cl-tohoku/bert-base-japanese-whole-word-masking データセット：tyqiangz/multilingual-sentiments バッチサイズ: 16固定オプティマイザ: adamw Optunaでハイパーパラメータ探索学習率スケジュールのタイプ(lr_scheduler_type):", "url": "https://huggingface.co/A-Funakoshi/bert-finetuned-multilingual-sentiments-adamw", "project_name": "bert-finetuned-multilingual-sentiments-adamw", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-10-04 22:56:44", "latest_commit": "2023-11-02 11:16:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Multilinguality", "Language Models", "Semantic Text Processing", "Sentiment Analysis" ] }, { "description": "japanese-large-lm-3.6b-instruction-sft-4bit-32g-actorder_False", "url": "https://huggingface.co/line-corporation/japanese-large-lm-3.6b-instruction-sft-4bit-32g-actorder_False", "project_name": "japanese-large-lm-3.6b-instruction-sft-4bit-32g-actorder_False", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-09-26 06:15:51", "latest_commit": "2023-09-27 23:56:05", "languages": [], "model_or_dataset": "model", "model_size": 0.861, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "ベースモデル：cl-tohoku/bert-base-japanese-whole-word-masking データセット：llm-book/wrime-sentiment オプティマイザ: adafactor Optunaでハイパーパラメータ探索学習率スケジュールのタイプ(lr_scheduler_type):", "url": "https://huggingface.co/A-Funakoshi/bert-base-japanese-v3-wrime-v1", "project_name": "bert-base-japanese-v3-wrime-v1", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-09-29 10:52:53", "latest_commit": "2023-10-25 22:58:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Swallow-MoE-2x13B-v0.1 English description here 概要 Llama-2ベースの学習済み日本語モデルであるtokyotech-llm/Swallow-13b-instruct-hfと、それを利用したマージモデルであるnitky/Superswallow-13b-v0.2 を、mergekitを使ってMoEを行い作成したモデルです。 ", "url": "https://huggingface.co/Aratako/Swallow-MoE-2x13B-v0.1", "project_name": "Swallow-MoE-2x13B-v0.1", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-03-03 11:03:22", "latest_commit": "2024-03-19 02:33:15", "languages": [], "model_or_dataset": "model", "model_size": 21.6, "model_architectures": "MixtralForCausalLM", "multi_labels": [] }, { "description": "ELYZA-japanese-Llama-2-MoE-2x7B-v0.1 English description here 概要 Llama-2ベースの学習済み日本語モデルであるelyza/ELYZA-japanese-Llama-2-7bと、そのinstruction tuningモデルであるelyza/ELYZA-japanese-Llama-2-7b-instruct を、mergekitを使ってMoEを行い作成したモデルです。 ", "url": "https://huggingface.co/Aratako/ELYZA-japanese-Llama-2-MoE-2x7B-v0.1", "project_name": "ELYZA-japanese-Llama-2-MoE-2x7B-v0.1", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-03-06 11:07:05", "latest_commit": "2024-03-19 02:31:33", "languages": [], "model_or_dataset": "model", "model_size": 11.1, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Multilinguality", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese-LLaMA-2-13B Japanese-LLaMA-2-13Bは基盤モデル、フルモデルです。 ", "url": "https://huggingface.co/owner203/japanese-llama-2-13b", "project_name": "japanese-llama-2-13b", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-12-20 05:41:02", "latest_commit": "2023-12-26 11:36:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Tanuki-Zero Base model: llm-jp/llm-jp-13b-v1.0 Instruction data: Randomly sampled, 15k Jaster dataset (train) Code is here.", "url": "https://huggingface.co/kanhatakeyama/Tanuki-ZeRo", "project_name": "Tanuki-ZeRo", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-03-29 23:31:35", "latest_commit": "2024-03-30 00:51:03", "languages": [], "model_or_dataset": "model", "model_size": 12.9, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "Mixtral-8x7B-Instruct-v0.1-japanese Mixtral-8x7B-Instruct-v0.1-japaneseはMixtral-8x7B-Instruct-v0.1をベースに日本語の語彙拡張継続事前学習を実施したモデルです。", "url": "https://huggingface.co/abeja/Mixtral-8x7B-Instruct-v0.1-japanese", "project_name": "Mixtral-8x7B-Instruct-v0.1-japanese", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-04-17 02:56:15", "latest_commit": "2024-04-20 09:14:27", "languages": [], "model_or_dataset": "model", "model_size": 46.9, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Mixtral-8x7B-Instruct-v0.1-japanese-alpha Mixtral-8x7B-Instruct-v0.1-japanese-alphaはMixtral-8x7B-Instruct-v0.1をベースに日本語の語彙拡張継続事前学習を実施した学習途中のモデルです。", "url": "https://huggingface.co/abeja/Mixtral-8x7B-Instruct-v0.1-japanese-alpha", "project_name": "Mixtral-8x7B-Instruct-v0.1-japanese-alpha", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-04-16 07:52:55", "latest_commit": "2024-04-20 09:14:43", "languages": [], "model_or_dataset": "model", "model_size": 46.9, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "Oumuamua-7b-RP GGUF版はこちら/Click here for the GGUF version 概要 This is a merge of pre-trained language models created using mergekit. ", "url": "https://huggingface.co/Aratako/Oumuamua-7b-RP", "project_name": "Oumuamua-7b-RP", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-06-23 12:30:16", "latest_commit": "2024-06-23 17:06:53", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "japanese-chat-umievo-itr004-7b", "url": "https://huggingface.co/umiyuki/Japanese-Chat-Umievo-itr004-7b", "project_name": "Japanese-Chat-Umievo-itr004-7b", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-05-12 11:48:36", "latest_commit": "2024-05-13 14:08:37", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "deberta-v3-base-japanese-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-v3-base-japanese-ud-goeswith", "project_name": "deberta-v3-base-japanese-ud-goeswith", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-05-21 11:42:12", "latest_commit": "2024-09-12 23:31:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "japanese-gpt-1b This repository provides a 1.3B-parameter Japanese GPT model.", "url": "https://huggingface.co/yohida/yoshida_gpt", "project_name": "yoshida_gpt", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2022-02-04 10:03:54", "latest_commit": "2022-02-04 10:13:45", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "karakuri-midrose-mg モデルの詳細は、こちらです。", "url": "https://huggingface.co/sbtom/karakuri-midroze-mg", "project_name": "karakuri-midroze-mg", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-04-16 17:03:50", "latest_commit": "2024-04-18 16:11:01", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Ninja-v1-RP-expressive-breadcrumbs GGUF版はこちら/Click here for the GGUF version 概要 This is a merge of pre-trained language models created using mergekit. ", "url": "https://huggingface.co/Aratako/Ninja-v1-RP-expressive-breadcrumbs", "project_name": "Ninja-v1-RP-expressive-breadcrumbs", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-05-26 06:36:42", "latest_commit": "2024-06-01 11:54:18", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "llm-jp-3-172b-alpha2", "url": "https://huggingface.co/llm-jp/llm-jp-3-172b-alpha2", "project_name": "llm-jp-3-172b-alpha2", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-09-29 16:25:46", "latest_commit": "2024-10-04 03:22:57", "languages": [], "model_or_dataset": "model", "model_size": 172.0, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "gpt2-large-japanese-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/gpt2-large-japanese-ud-causal", "project_name": "gpt2-large-japanese-ud-causal", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-08-25 00:35:43", "latest_commit": "2024-08-29 17:08:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "en-ja-align 日英対訳文対応付けデータ(内山ら, 2003)として公開されている日英対訳文データセットです。 ", "url": "https://huggingface.co/datasets/hpprc/en-ja-align", "project_name": "en-ja-align", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-03-19 06:12:37", "latest_commit": "2024-03-20 09:17:55", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Wikipedia日本語版からのQ&Aの自動生成 Mixtral 8x22bのGGUF(5bit)をベースに､Wikipedia日本語版の記事から､自動生成コード1 自動生成コード2 を使ってQ&Aを作成しました｡計算には東京工業大学のスーパーコンピュータTSUBAME4.0を利用しました｡注意回答にハルシネーション等が含まれている可能性があるので､フィルタリングをかける必要があるかもしれません｡", "url": "https://huggingface.co/datasets/kanhatakeyama/AutoWikiQA", "project_name": "AutoWikiQA", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-05-08 08:25:16", "latest_commit": "2024-05-09 01:05:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This dataset was created by machine translating \"ViQuAE\" into Japanese.", "url": "https://huggingface.co/datasets/toshi456/ViQuAE-JA", "project_name": "ViQuAE-JA", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-05-19 07:00:01", "latest_commit": "2024-05-19 07:07:49", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Natural Language Interfaces", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "description public RLHF dataset in Japanese the construction of the reward model was reformatted into a classification task Quality of Japanese text is somewhat low arise from the combination of synthetic generated text and machine translation API details reformatted dataset of open_preference_v0.1 label 1 stands for chosen sentence label 0 stands for rejected sentence", "url": "https://huggingface.co/datasets/ryota39/open_preference_v0.2", "project_name": "open_preference_v0.2", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-06-19 09:34:01", "latest_commit": "2024-07-04 12:55:23", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Annotation and Dataset Development" ] }, { "description": "以下のデータ源からランダムに抽出した日本語のテキストをもとに､RAG形式のQ&Aを自動生成したものです｡ Wikibooks Wikipedia 判例データ instruction datasetとしてではなく､事前学習での利用を想定しています(質疑応答をするための訓練)｡一部の計算には東京工業大学のスーパーコンピュータTSUBAME4.0を利用しました｡", "url": "https://huggingface.co/datasets/kanhatakeyama/CreativeCommons-RAG-QA-Mixtral8x22b", "project_name": "CreativeCommons-RAG-QA-Mixtral8x22b", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-07-03 07:54:49", "latest_commit": "2024-07-12 06:43:18", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "PubChem & Wikipedia English-Japanese Paragraph Pair Classification This dataset is a multilingual extension of the PubChem & Wikipedia Paragraphs Pair Classification dataset.", "url": "https://huggingface.co/datasets/BASF-AI/PubChemWikiJAPC", "project_name": "PubChemWikiJAPC", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-12-04 22:35:35", "latest_commit": "2024-12-05 20:29:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Information Extraction & Text Mining", "Information Retrieval", "Text Classification", "Annotation and Dataset Development" ] }, { "description": "生成AIの日英専門用語集です。", "url": "https://huggingface.co/datasets/alfredplpl/genai-terminology-en-ja", "project_name": "genai-terminology-en-ja", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-11-01 08:01:31", "latest_commit": "2023-11-01 08:05:56", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation" ] }, { "description": "https://github.com/anthropics/hh-rlhf の内容のうち、helpful-base内のchosenに記載されている英文をfuguMTで翻訳、うまく翻訳できていないものを除外、修正したものです。", "url": "https://huggingface.co/datasets/nakayama/hh-rlhf-helpful-base-ja", "project_name": "hh-rlhf-helpful-base-ja", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-05-26 15:48:51", "latest_commit": "2023-05-26 15:57:12", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "日本語フェイクニュースデータセット日本語フェイクニュースデータセットを HuggingFace datasets 用に変換。 ", "url": "https://huggingface.co/datasets/p1atdev/fake-news-jp", "project_name": "fake-news-jp", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-09-22 12:40:39", "latest_commit": "2023-09-22 12:54:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Language Models", "Semantic Text Processing" ] }, { "description": "FEDデータセットをGoogle Cloud Translate API v2で日本語化したデータセットです． ", "url": "https://huggingface.co/datasets/yubo0306/fed_ja", "project_name": "fed_ja", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2023-10-29 03:55:00", "latest_commit": "2023-10-29 04:26:57", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "このデータセットは、OpenAI社のGPT-3.5を https://huggingface.co/datasets/kenkensz9/kenkensz9_1242tw2 のデータでファインチューニングした後に、更に独自に収集した人格のあるツイート330でファインチューニングしたモデルでツイートを生成し、それに対してスコアを付与したモデルです。 ", "url": "https://huggingface.co/datasets/kenkensz9/nareba1691", "project_name": "nareba1691", "downloads": 16, "source": "Hugging Face", "score": -0.05199136871209211, "first_commit": "2024-06-01 02:13:56", "latest_commit": "2024-06-01 02:44:25", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "ELECTRA small Japanese finance generator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-small-japanese-fin-generator", "project_name": "electra-small-japanese-fin-generator", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2021-10-04 14:07:16", "latest_commit": "2023-10-21 13:21:23", "languages": [], "model_or_dataset": "model", "model_size": 0.013800000000000002, "model_architectures": "ElectraForMaskedLM", "multi_labels": [ "Language Models" ] }, { "description": "Wav2Vec2 Accent Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese accent dataset When using this model, make sure that your speech input is sampled at 16kHz.", "url": "https://huggingface.co/vumichien/wav2vec2-large-pitch-recognition", "project_name": "wav2vec2-large-pitch-recognition", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2021-07-16 05:27:54", "latest_commit": "2023-02-08 03:15:13", "languages": [], "model_or_dataset": "model", "model_size": 0.315, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Speech & Audio in NLP", "Semantic Text Processing" ] }, { "description": "This is a Japanese+English sentence-BERT model.", "url": "https://huggingface.co/sonoisa/sentence-bert-base-ja-en-mean-tokens", "project_name": "sentence-bert-base-ja-en-mean-tokens", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2022-05-08 03:05:08", "latest_commit": "2022-05-08 03:29:28", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertModel", "multi_labels": [ "Multilinguality", "Language Models", "Semantic Similarity", "Semantic Text Processing" ] }, { "description": "electra-base-cyberbullying This is an ELECTRA Small model for the Japanese language finetuned for automatic cyberbullying detection.", "url": "https://huggingface.co/kit-nlp/electra-small-japanese-discriminator-cyberbullying", "project_name": "electra-small-japanese-discriminator-cyberbullying", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2022-09-09 02:43:59", "latest_commit": "2022-11-01 07:14:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForSequenceClassification", "multi_labels": [] }, { "description": "roberta-long-japanese (jumanpp + sentencepiece, mC4 Japanese)", "url": "https://huggingface.co/megagonlabs/roberta-long-japanese", "project_name": "roberta-long-japanese", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2022-09-04 14:31:06", "latest_commit": "2022-10-04 23:36:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "transformer-lm-japanese-0.1b", "url": "https://huggingface.co/fukugawa/transformer-lm-japanese-0.1b", "project_name": "transformer-lm-japanese-0.1b", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-07-12 02:11:11", "latest_commit": "2024-06-03 06:17:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "TransformerLMForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-gamma-7B-AWQ", "project_name": "japanese-stablelm-instruct-gamma-7B-AWQ", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-10-28 19:03:17", "latest_commit": "2023-11-09 18:16:33", "languages": [], "model_or_dataset": "model", "model_size": 1.2, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "お知らせより回答が適切になるように学習させたモデル、https://huggingface.co/hotchpotch/youri-7b-stf-qa-context-jaqket-jsquad-gptq もあります。 ", "url": "https://huggingface.co/hotchpotch/youri-7b-sft-qa-context-jaqket-awq", "project_name": "youri-7b-sft-qa-context-jaqket-awq", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-12-10 08:52:23", "latest_commit": "2024-02-25 06:40:30", "languages": [], "model_or_dataset": "model", "model_size": 1.13, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Natural Language Interfaces", "Language Models", "Semantic Text Processing" ] }, { "description": "Model Card for Japanese DeBERTa V2 base Model description This is a Japanese DeBERTa V2 base model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/deberta-v2-base-japanese-with-auto-jumanpp", "project_name": "deberta-v2-base-japanese-with-auto-jumanpp", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-09-07 06:04:29", "latest_commit": "2023-11-20 06:00:08", "languages": [], "model_or_dataset": "model", "model_size": 0.137, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese DialoGPT trained with Aozora (ja) 青空文庫のセリフで学習した日本語のDialoGPT Smallです(en) Japanese DialoGPT Small trained on Aozora Bunko.", "url": "https://huggingface.co/akiFQC/japanese-dialogpt-small-aozora", "project_name": "japanese-dialogpt-small-aozora", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-02-08 13:22:24", "latest_commit": "2023-02-09 00:55:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents", "Language Models" ] }, { "description": "記事本文からタイトルを生成するモデル SEE: https://qiita.com/sonoisa/items/30876467ad5a8a81821f", "url": "https://huggingface.co/sonoisa/t5-qiita-title-generation", "project_name": "t5-qiita-title-generation", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2021-10-17 14:46:56", "latest_commit": "2022-02-21 13:39:01", "languages": [], "model_or_dataset": "model", "model_size": 0.223, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [] }, { "description": "日本語T5事前学習済みモデル This is a T5 (Text-to-Text Transfer Transformer) model pretrained on Japanese corpus. ", "url": "https://huggingface.co/sonoisa/t5-base-japanese-mC4-Wikipedia", "project_name": "t5-base-japanese-mC4-Wikipedia", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2021-06-30 12:53:09", "latest_commit": "2021-09-23 18:29:58", "languages": [], "model_or_dataset": "model", "model_size": 0.223, "model_architectures": null, "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Genji-JP 6B Please check our blog post for more details, samples, evaluations and more: Blogpost Model Description Genji-JP 6B is a model finetuned on our Japanese storytelling dataset based on EleutherAI's GPT-J 6B model.", "url": "https://huggingface.co/NovelAI/genji-jp", "project_name": "genji-jp", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2021-11-03 15:07:47", "latest_commit": "2022-08-09 17:36:02", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTJForCausalLM", "multi_labels": [ "Text Generation" ] }, { "description": "roberta-large-japanese-char-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-char-luw-upos", "project_name": "roberta-large-japanese-char-luw-upos", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2021-12-30 15:56:46", "latest_commit": "2022-09-18 19:44:49", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese-Novel-Reward-modernbert-ja-310m このモデルはsbintuitions/modernbert-ja-310mをファインチューニングして作成された日本語小説の品質評価のためのRewardモデルです。 ", "url": "https://huggingface.co/Aratako/Japanese-Novel-Reward-modernbert-ja-310m", "project_name": "Japanese-Novel-Reward-modernbert-ja-310m", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2025-03-03 09:11:36", "latest_commit": "2025-03-04 15:25:10", "languages": [], "model_or_dataset": "model", "model_size": 0.315, "model_architectures": "ModernBertForSequenceClassification", "multi_labels": [ "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "Byt5-small-ain-jpn-mt is a machine translation model pretrained with Google's ByT5-small and fine-tuned on bilingual datasets crawled from the Web.", "url": "https://huggingface.co/Language-Media-Lab/byt5-small-ain-jpn-mt", "project_name": "byt5-small-ain-jpn-mt", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2022-01-25 06:37:11", "latest_commit": "2022-02-04 13:03:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "Japanese-LLaMA-2-13B-GGUF Japanese-LLaMA-2-13B-GGUFはJapanese-LLaMA-2-13BのGGUF形式です。 ", "url": "https://huggingface.co/owner203/japanese-llama-2-13b-gguf", "project_name": "japanese-llama-2-13b-gguf", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-12-20 05:37:09", "latest_commit": "2023-12-26 11:45:15", "languages": [], "model_or_dataset": "model", "model_size": 13.3, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "bert-large-japanese-unidic-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-unidic-luw-upos", "project_name": "bert-large-japanese-unidic-luw-upos", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2022-02-13 01:00:41", "latest_commit": "2023-11-05 18:44:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Representation Learning", "Syntactic Text Processing", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "このモデルはdeberta-v2-base-japaneseをファインチューニングしてCommonsenseQA(選択式の質問)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-japanese-base-finetuned-commonsenseqa", "project_name": "deberta-v2-japanese-base-finetuned-commonsenseqa", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-02-01 01:02:44", "latest_commit": "2023-05-26 15:05:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMultipleChoice", "multi_labels": [ "Reasoning", "Language Models", "Commonsense Reasoning", "Semantic Text Processing" ] }, { "description": "Kendamarron/LongWriter-llm-jp-3-3.7b-instruct llm-jp/llm-jp-3-3.7b-instructを長文出力ができるようにSFTしたモデルです。", "url": "https://huggingface.co/Kendamarron/LongWriter-llm-jp-3-3.7b-instruct", "project_name": "LongWriter-llm-jp-3-3.7b-instruct", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-12-11 14:52:39", "latest_commit": "2024-12-11 15:20:01", "languages": [], "model_or_dataset": "model", "model_size": 3.78, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "k-ush/xlm-roberta-base-ance-en-jp-warmup A XLM-RoBERTa-base model trained on mMARCO Japanese dataset with ANCE warmup script.", "url": "https://huggingface.co/k-ush/xlm-roberta-base-ance-en-jp-warmup", "project_name": "xlm-roberta-base-ance-en-jp-warmup", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-02-19 03:00:06", "latest_commit": "2023-02-22 12:04:25", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaDot_NLL_LN", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "INPUT: Japanese name in ROMAJI FORM OUTPUT:", "url": "https://huggingface.co/tarudesu/gendec-with-distilmbert", "project_name": "gendec-with-distilmbert", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-11-14 02:12:39", "latest_commit": "2024-03-23 16:49:33", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DistilBertForSequenceClassification", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "JPNsensei-V2 Model Application", "url": "https://huggingface.co/kanxxyc/JPNsensei-V2", "project_name": "JPNsensei-V2", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-10-30 00:18:24", "latest_commit": "2024-03-11 10:19:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Natural Language Interfaces", "Question Answering", "Language Models" ] }, { "description": "bert-base-sudachitra-v11", "url": "https://huggingface.co/hiroshi-matsuda-rit/bert-base-sudachitra-v11", "project_name": "bert-base-sudachitra-v11", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-05-06 11:00:39", "latest_commit": "2024-01-14 16:29:56", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "doc2query/msmarco-japanese-mt5-base-v1 This is a doc2query model based on mT5 (also known as docT5query).", "url": "https://huggingface.co/doc2query/msmarco-japanese-mt5-base-v1", "project_name": "msmarco-japanese-mt5-base-v1", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2022-04-29 12:05:21", "latest_commit": "2022-04-29 14:05:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MT5ForConditionalGeneration", "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "friendly_JA-Model (T5 fine-tuned model) MT model trained using the friendly_JA Corpus attempting to make Japanese easier/more accessible to occidental people by using the Latin/English derived katakana lexicon instead of the standard Sino-Japanese lexicon Examples input output 最適化を応用した機械翻訳モデルは高精度だオプティマイゼーションを応用したマシントランスレーションモデルは高いアキュラシーだ彼は架空の世界に住んでいる彼はイマジナリー世界に住んでいる新型コロナウイルスに感染してしまったコロナウイルスにかかってしまった深層学習は難しいディープラーニングはむずかしい新たな概念を紹介する新しいコンセプトを紹介する津波の警報が流れたツナミのアラートが流れた南海トラフの災害は震源地による南海トラフのディザスターはエピ", "url": "https://huggingface.co/astremo/friendly_JA", "project_name": "friendly_JA", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2022-01-10 06:31:18", "latest_commit": "2022-05-22 14:57:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese-LLaMA-2-7B-GGUF Japanese-LLaMA-2-7B-GGUFはJapanese-LLaMA-2-7BのGGUF形式です。 ", "url": "https://huggingface.co/owner203/japanese-llama-2-7b-gguf", "project_name": "japanese-llama-2-7b-gguf", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-01-22 03:00:02", "latest_commit": "2024-06-05 02:30:01", "languages": [], "model_or_dataset": "model", "model_size": 6.97, "model_architectures": null, "multi_labels": [] }, { "description": "Llama 3 Youko 70B (rinna/llama-3-youko-70b)", "url": "https://huggingface.co/rinna/llama-3-youko-70b", "project_name": "llama-3-youko-70b", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-07-21 14:13:34", "latest_commit": "2024-07-25 05:16:28", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Model overview This model is the baseline model for awesome-japanese-nlp-classification-dataset.", "url": "https://huggingface.co/taishi-i/awesome-japanese-nlp-classification-model", "project_name": "awesome-japanese-nlp-classification-model", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-09-09 09:23:05", "latest_commit": "2023-09-10 00:18:22", "languages": [], "model_or_dataset": "model", "model_size": 0.178, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification" ] }, { "description": "yuyuyui-chatbot", "url": "https://huggingface.co/ushikado/yuyuyui-chatbot", "project_name": "yuyuyui-chatbot", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2021-05-04 14:52:12", "latest_commit": "2021-05-23 13:27:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Natural Language Interfaces", "Dialogue Response Generation", "Dialogue Systems & Conversational Agents" ] }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", "url": "https://huggingface.co/LoneStriker/SambaLingo-Japanese-Chat-8.0bpw-h8-exl2", "project_name": "SambaLingo-Japanese-Chat-8.0bpw-h8-exl2", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-03-07 06:57:50", "latest_commit": "2024-03-07 07:00:51", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "VecteusをベースにLLavaに対応させたモデルです。 ", "url": "https://huggingface.co/Local-Novel-LLM-project/Ocuteus-v1", "project_name": "Ocuteus-v1", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-05-07 10:03:15", "latest_commit": "2024-05-10 05:39:04", "languages": [], "model_or_dataset": "model", "model_size": 7.57, "model_architectures": "LlavaMistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Example ESPnet2 TTS model kan-bayashi/jsut_fastspeech2 ♻", "url": "https://huggingface.co/espnet/kan-bayashi_jsut_fastspeech2", "project_name": "kan-bayashi_jsut_fastspeech2", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2021-07-03 14:45:57", "latest_commit": "2021-07-03 10:46:00", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Example ESPnet2 TTS model kan-bayashi/jsut_tacotron2 ♻", "url": "https://huggingface.co/espnet/kan-bayashi_jsut_tacotron2", "project_name": "kan-bayashi_jsut_tacotron2", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2021-07-03 14:43:58", "latest_commit": "2021-07-03 10:44:00", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Swallow-MoE-4x7B-lisa 概要 tokyotech-llm/Swallow-7b-hfをベースに、以下の4モデルをgate_mode=randomでMoEし、その後LISAという手法でインストラクションチューニングを施したモデルです。 ", "url": "https://huggingface.co/Aratako/Swallow-MoE-4x7B-lisa", "project_name": "Swallow-MoE-4x7B-lisa", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-04-02 01:20:21", "latest_commit": "2024-04-05 11:35:25", "languages": [], "model_or_dataset": "model", "model_size": 19.8, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "bert-large-japanese-v2-finetuned-wrime", "url": "https://huggingface.co/MuneK/bert-large-japanese-v2-finetuned-jed", "project_name": "bert-large-japanese-v2-finetuned-jed", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-09-11 09:37:42", "latest_commit": "2023-11-07 11:47:01", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Kurage Kurage is a multipurpose RAG model from Lightblue.", "url": "https://huggingface.co/lightblue/kurage-ja", "project_name": "kurage-ja", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-09-11 03:39:10", "latest_commit": "2024-09-16 08:12:19", "languages": [], "model_or_dataset": "model", "model_size": 7.61, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "aashish1904/gemma-2-2b-jpn-it-Q2_K-GGUF", "url": "https://huggingface.co/aashish1904/gemma-2-2b-jpn-it-Q2_K-GGUF", "project_name": "gemma-2-2b-jpn-it-Q2_K-GGUF", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-10-03 13:46:16", "latest_commit": "2024-10-03 13:46:23", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "reazonspeech-espnet-v1 reazonspeech-espnet-v1 is an ESPnet model trained for Japanese automatic speech recognition (ASR).", "url": "https://huggingface.co/Dallyana/EspnetASR", "project_name": "EspnetASR", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-01-26 20:47:19", "latest_commit": "2024-01-26 20:52:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Summary This is an LLaMA 3 Youko qlora, created using a custom version of the VNTL dataset combined with the VNTL-Chat dataset.", "url": "https://huggingface.co/lmg-anon/vntl-llama3-8b-202409-qlora", "project_name": "vntl-llama3-8b-202409-qlora", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-09-24 03:18:52", "latest_commit": "2024-09-25 16:23:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "A very tiny 33.5M Llama3 model trained on a Macbook Pro with M3 Max for 10 hours.", "url": "https://huggingface.co/frost-beta/Llama3-33.5M-Japanese", "project_name": "Llama3-33.5M-Japanese", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-07-16 23:58:54", "latest_commit": "2024-07-17 08:27:07", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "gpt2-medium-japanese-unidic-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/gpt2-medium-japanese-unidic-upos", "project_name": "gpt2-medium-japanese-unidic-upos", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-08-30 04:34:52", "latest_commit": "2024-08-30 14:09:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "JBLiMP This is the data from \"JBLiMP: Japanese Benchmark of Linguistic Minimal Pairs\" (Someya and Oseki, 2023).", "url": "https://huggingface.co/datasets/polm-stability/jblimp", "project_name": "jblimp", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-05-29 09:31:31", "latest_commit": "2023-05-29 18:49:16", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "oasst2-135k-jaをチャット形式に変換したデータセットになります。", "url": "https://huggingface.co/datasets/kunishou/oasst2-chat-68k-ja", "project_name": "oasst2-chat-68k-ja", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2023-12-25 13:19:09", "latest_commit": "2023-12-25 13:21:58", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "cosmopedia-100k のindex 20k ～ 100k を日本語に自動翻訳したデータになります（テキストが長すぎて翻訳エラーになったレコードは除外しています）。", "url": "https://huggingface.co/datasets/kunishou/cosmopedia-100k-ja-preview", "project_name": "cosmopedia-100k-ja-preview", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-02-28 07:58:55", "latest_commit": "2024-03-05 23:30:38", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Retrieval", "Indexing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "This dataset was created by machine translating \"nlvr\" into Japanese.", "url": "https://huggingface.co/datasets/toshi456/NLVR-JA", "project_name": "NLVR-JA", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-05-10 12:44:01", "latest_commit": "2024-05-10 12:51:56", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "このデータセットは、著作者である自分がしたツイートから特に優れたもの(後述)を集めたものです。 ", "url": "https://huggingface.co/datasets/kenkensz9/kenkensz9_1242tw2", "project_name": "kenkensz9_1242tw2", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-06-01 00:05:43", "latest_commit": "2024-06-01 01:38:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This dataset is a collection of Korean, Chinese, and Japanese OpenOrca translation datasets.", "url": "https://huggingface.co/datasets/werty1248/OpenOrca-EnKoZhJa-18k", "project_name": "OpenOrca-EnKoZhJa-18k", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-08-10 18:54:09", "latest_commit": "2024-08-10 19:16:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Representation Learning", "Text Generation", "Machine Translation", "Semantic Text Processing" ] }, { "description": "人間が作成したテキスト(OSCAR)とLLM生成テキスト(GPT-3.5 Turbo)から成るデータセット LLMで生成された日本語テキストの検出性能の検証のために作成した詳細はコードを参照 https://github.com/Rio-Rf/Lab-CreateDataset", "url": "https://huggingface.co/datasets/Rio-Rf/oscar_2023_filtered_and_ai_text_filtered", "project_name": "oscar_2023_filtered_and_ai_text_filtered", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-11-02 02:52:51", "latest_commit": "2024-11-02 03:30:47", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "JSECホームページ", "url": "https://huggingface.co/datasets/hpprc/jsec", "project_name": "jsec", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-11-20 12:55:59", "latest_commit": "2024-11-20 13:52:38", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "def prompt(japanese, english):", "url": "https://huggingface.co/datasets/Moleys/Filtered-Japanese-English-Parallel-Corpus", "project_name": "Filtered-Japanese-English-Parallel-Corpus", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Responsible & Trustworthy NLP", "Text Generation", "Machine Translation", "Language Models", "Low-Resource NLP" ] }, { "description": "以下の条件に同意したうえで、公開されたモデル及びデータセット等（以下「本コンテンツ」）といいます）をダウンロードします。 ", "url": "https://huggingface.co/datasets/weblab-GENIAC/jwinogrande", "project_name": "jwinogrande", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "埋め込みモデルの学習、評価のためのクラスタリングデータセットです。 ", "url": "https://huggingface.co/datasets/oshizo/ASRClustering-ja", "project_name": "ASRClustering-ja", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-06-22 12:12:34", "latest_commit": "2024-06-23 15:35:03", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Structured Data in NLP", "Annotation and Dataset Development" ] }, { "description": "Dataset details Dataset type:", "url": "https://huggingface.co/datasets/toshi456/LLaVA-JP-Instruct-108K", "project_name": "LLaVA-JP-Instruct-108K", "downloads": 15, "source": "Hugging Face", "score": -0.051998467837786624, "first_commit": "2024-05-12 13:38:21", "latest_commit": "2024-05-12 13:51:45", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Visual Data in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "mlx-community/DeepSeek-R1-Distill-Qwen-32B-Japanese The Model mlx-community/DeepSeek-R1-Distill-Qwen-32B-Japanese was converted to MLX format from cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese using mlx-lm version 0.21.1.", "url": "https://huggingface.co/mlx-community/DeepSeek-R1-Distill-Qwen-32B-Japanese", "project_name": "DeepSeek-R1-Distill-Qwen-32B-Japanese", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2025-01-27 10:39:58", "latest_commit": "2025-01-27 13:43:41", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Language Models", "Semantic Text Processing", "Multimodality", "Low-Resource NLP" ] }, { "description": "Japanese-Novel-Reward-modernbert-ja-130m このモデルはsbintuitions/modernbert-ja-130mをファインチューニングして作成された日本語小説の品質評価のためのRewardモデルです。 ", "url": "https://huggingface.co/Aratako/Japanese-Novel-Reward-modernbert-ja-130m", "project_name": "Japanese-Novel-Reward-modernbert-ja-130m", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2025-02-25 22:51:14", "latest_commit": "2025-03-04 15:23:36", "languages": [], "model_or_dataset": "model", "model_size": 0.132, "model_architectures": "ModernBertForSequenceClassification", "multi_labels": [ "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "Wav2Vec2-Large-XLSR-53-{language} #TODO: replace language with your {language}, e.g. ", "url": "https://huggingface.co/qqpann/wav2vec2-large-xlsr-japanese-0325-1200", "project_name": "wav2vec2-large-xlsr-japanese-0325-1200", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2021-03-29 08:22:13", "latest_commit": "2021-03-29 19:26:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "日本語 gpt2 蒸留モデルこのモデルはrinna/japanese-gpt2-meduimを教師として蒸留したものです。 ", "url": "https://huggingface.co/knok/japanese-distilgpt2", "project_name": "japanese-distilgpt2", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2022-04-14 09:32:23", "latest_commit": "2022-04-15 06:00:51", "languages": [], "model_or_dataset": "model", "model_size": 0.116, "model_architectures": null, "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "GPT2 Japanese base model version 2 Prerequisites transformers==4.19.2 Model architecture This model uses GPT2 base setttings except vocabulary size.", "url": "https://huggingface.co/ClassCat/gpt2-base-japanese-v2", "project_name": "gpt2-base-japanese-v2", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2022-06-04 02:30:34", "latest_commit": "2022-06-25 15:36:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Donut (base-sized model, fine-tuned on visual novel like synthetic dataset ) ビジュアルノベル風画像の合成データセットでnaver-clova-ix/donut-baseを訓練したモデルです。 ", "url": "https://huggingface.co/oshizo/donut-base-japanese-visual-novel", "project_name": "donut-base-japanese-visual-novel", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-05-03 04:53:49", "latest_commit": "2023-05-03 09:25:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VisionEncoderDecoderModel", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "japanese-large-lm-1.7b-instruction-sft-8bit-1g-actorder_True", "url": "https://huggingface.co/line-corporation/japanese-large-lm-1.7b-instruction-sft-8bit-1g-actorder_True", "project_name": "japanese-large-lm-1.7b-instruction-sft-8bit-1g-actorder_True", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-09-26 06:15:31", "latest_commit": "2023-09-29 03:09:03", "languages": [], "model_or_dataset": "model", "model_size": 0.625, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "日本語でtrainingしたllama2 model size: 130.78M trainingは以下のscript参照 https://github.com/Lightning-AI/lit-gpt/tree/main use from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained(\"if001/sentencepiece_ja\", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(\"if001/llama2_ja_ss\")", "url": "https://huggingface.co/if001/llama2_ja_ss", "project_name": "llama2_ja_ss", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-10-16 04:12:34", "latest_commit": "2023-10-16 13:49:48", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Dialogue Response Generation", "Syntactic Text Processing", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Kokuwa lamettaの改良でマージさせるモデル探しをしていたらKiwiMixという面白そうなモデルを見つけました。 ", "url": "https://huggingface.co/Lasorco/Kokuwa", "project_name": "Kokuwa", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-10-24 14:10:27", "latest_commit": "2023-10-26 04:22:46", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese BERT-base (Sudachi + WordPiece) How to load the tokenizer Please download the dictionary file for Sudachi + WordPiece from our GitHub repository.", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_sudachi-wordpiece", "project_name": "bert-base-japanese_sudachi-wordpiece", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-06-14 07:17:23", "latest_commit": "2023-06-16 01:04:41", "languages": [], "model_or_dataset": "model", "model_size": 0.109, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "ku-accms/bert-base-japanese-ssuw Model description This is a pre-trained Japanese BERT base model for super short unit words (SSUW).", "url": "https://huggingface.co/ku-accms/bert-base-japanese-ssuw", "project_name": "bert-base-japanese-ssuw", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-04-11 13:57:30", "latest_commit": "2023-04-12 04:40:42", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-large-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-luw-upos", "project_name": "deberta-large-japanese-luw-upos", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2022-05-26 14:52:32", "latest_commit": "2023-01-14 23:15:30", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-small-japanese-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-small-japanese-upos", "project_name": "deberta-small-japanese-upos", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2022-05-23 23:55:56", "latest_commit": "2024-07-26 15:38:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Syntactic Parsing", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "This is a Japanese sentence-T5 model.", "url": "https://huggingface.co/sonoisa/sentence-t5-base-ja-mean-tokens", "project_name": "sentence-t5-base-ja-mean-tokens", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2021-12-27 11:57:10", "latest_commit": "2022-07-31 07:54:13", "languages": [], "model_or_dataset": "model", "model_size": 0.223, "model_architectures": "T5Model", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "roberta-large-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-luw-upos", "project_name": "roberta-large-japanese-luw-upos", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2021-12-26 13:51:46", "latest_commit": "2024-08-20 18:34:07", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Text Classification", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "Google's mt5-base fine-tuned in Japanese to solve error detection and correction task. ", "url": "https://huggingface.co/kz/mt5base-finetuned-ECC-japanese-small", "project_name": "mt5base-finetuned-ECC-japanese-small", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2021-03-21 19:07:13", "latest_commit": "2022-05-26 13:50:56", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MT5ForConditionalGeneration", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "roberta-large-japanese-aozora-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-aozora-ud-head", "project_name": "roberta-large-japanese-aozora-ud-head", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2022-06-22 00:49:08", "latest_commit": "2024-08-20 19:54:48", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForQuestionAnswering", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Semantic Text Processing" ] }, { "description": "Model Trained Using AutoNLP Problem type: Binary Classification Model ID: 59363 Validation Metrics Loss: 0.12651239335536957 Accuracy: 0.9532079853817648 Precision: 0.9729688278823665 Recall: 0.9744633462616643 AUC: 0.9717333684823413 F1: 0.9737155136027014 Usage You can use cURL to access this model: $ curl -X POST -H \"Authorization: Bearer YOUR_API_KEY\" -H \"Content-Type: application/json\" -d '{\"inputs\": \"I love AutoNLP\"}' https://api-inference.huggingface.co/models/abhishek/autonlp-japanese-sentiment-5936", "url": "https://huggingface.co/abhishek/autonlp-japanese-sentiment-59363", "project_name": "autonlp-japanese-sentiment-59363", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2021-04-21 11:28:24", "latest_commit": "2021-05-18 22:56:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification" ] }, { "description": "bert-large-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-luw-upos", "project_name": "bert-large-japanese-luw-upos", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2021-10-26 13:54:17", "latest_commit": "2022-09-18 19:43:45", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Representation Learning", "Syntactic Text Processing", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "bert-base-japanese-unidic-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-unidic-luw-upos", "project_name": "bert-base-japanese-unidic-luw-upos", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2022-02-13 01:00:01", "latest_commit": "2023-11-05 18:44:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "このモデルは unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit を日本語で微調整したモデルです.", "url": "https://huggingface.co/dahara1/DeepSeek-R1-Distill-Qwen-14B-unsloth-jpn", "project_name": "DeepSeek-R1-Distill-Qwen-14B-unsloth-jpn", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2025-01-23 01:01:58", "latest_commit": "2025-01-25 00:38:30", "languages": [], "model_or_dataset": "model", "model_size": 14.8, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "japanese-soseki-gpt2-1b", "url": "https://huggingface.co/jweb/japanese-soseki-gpt2-1b", "project_name": "japanese-soseki-gpt2-1b", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2022-03-03 04:53:15", "latest_commit": "2023-03-27 12:09:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "COMET-GPT2 ja v2 Finetuned GPT-2 xl on the large version of ATOMIC ja using a causal language modeling (CLM) objective.", "url": "https://huggingface.co/nlp-waseda/comet-gpt2-xl-japanese", "project_name": "comet-gpt2-xl-japanese", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-09-26 13:37:52", "latest_commit": "2024-03-11 04:16:02", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Dialogue Response Generation", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "DeepSeek-V3-slice-jp64 実験モデルです本モデルは DeepSeek-V3 をベースに、日本語の例文を元に頻出する MoE (Mixture of Experts) の各レイヤーごとのexpertsを厳選して再構成したモデルです。 ", "url": "https://huggingface.co/mmnga/DeepSeek-V3-slice-jp64", "project_name": "DeepSeek-V3-slice-jp64", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2025-01-01 15:50:29", "latest_commit": "2025-01-01 16:51:36", "languages": [], "model_or_dataset": "model", "model_size": 181.0, "model_architectures": "DeepseekV3ForCausalLM", "multi_labels": [] }, { "description": "bart-large-japanese This model is converted from the original Japanese BART Pretrained model released by Kyoto University.", "url": "https://huggingface.co/Formzu/bart-large-japanese", "project_name": "bart-large-japanese", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2022-10-31 06:53:19", "latest_commit": "2022-11-07 12:06:32", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MBartForConditionalGeneration", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "sonoisa/t5-base-japaneseをファインチューニングして、タイトル生成に用いれるようにしたモデルです。 ", "url": "https://huggingface.co/Mizuiro-sakura/t5-CAMERA-title-generation", "project_name": "t5-CAMERA-title-generation", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-03-21 10:49:27", "latest_commit": "2023-07-21 14:11:13", "languages": [], "model_or_dataset": "model", "model_size": 0.223, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "VITS TTS Japanese Only Sakura Miko こちらは「さくらみこ」の音声データセットに基づいて学習されたVITS-TTSモデルです。 ", "url": "https://huggingface.co/Lycoris53/Vits-TTS-Japanese-Only-Sakura-Miko", "project_name": "Vits-TTS-Japanese-Only-Sakura-Miko", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-07-01 16:55:49", "latest_commit": "2023-08-29 03:05:42", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-70B-GPTQ", "project_name": "japanese-stablelm-instruct-beta-70B-GPTQ", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-11-02 15:45:24", "latest_commit": "2023-11-02 20:04:07", "languages": [], "model_or_dataset": "model", "model_size": 9.1, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "Model Card Summary This model was trained using H2O LLM Studio.", "url": "https://huggingface.co/yukismd/JapaneseQuizChatbot_v1", "project_name": "JapaneseQuizChatbot_v1", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-06-08 00:25:01", "latest_commit": "2023-06-08 00:48:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "japanese-large-lm-3.6b-instruction-sft-4bit-128g-actorder_False", "url": "https://huggingface.co/line-corporation/japanese-large-lm-3.6b-instruction-sft-4bit-128g-actorder_False", "project_name": "japanese-large-lm-3.6b-instruction-sft-4bit-128g-actorder_False", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-09-26 06:16:04", "latest_commit": "2023-09-27 23:54:44", "languages": [], "model_or_dataset": "model", "model_size": 0.771, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "📄 ライセンス / License 修正 CreativeML OpenRAIL-M ライセンス / Modified CreativeML OpenRAIL-M license このモデルのクレジットを入れずに使用する Use the model without crediting the creator このモデルで生成した画像を商用利用する Sell images they generate このモデルを商用の画像生成サービスで利用する Run on services that generate images for money このモデルを使用したマージモデルを共有する Share merges using this model このモデル、またはこのモデルをマージしたモデルを販売する Sell this model or merges using this model このモデルをマージしたモデルに異なる権限を設定する Have different permissions when sharing merges", "url": "https://huggingface.co/natsusakiyomi/AsagaoMix", "project_name": "AsagaoMix", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-07-29 03:42:53", "latest_commit": "2023-08-25 07:32:49", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese BERT-base (Nothing + Unigram)", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_nothing-unigram", "project_name": "bert-base-japanese_nothing-unigram", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-06-14 08:07:28", "latest_commit": "2023-06-16 01:07:11", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "このモデルはluke-japanese-baseをファインチューニングして、JNLI(文章の関係性判別)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-finetuned-jnli", "project_name": "luke-japanese-base-finetuned-jnli", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-02-11 18:39:14", "latest_commit": "2023-07-21 14:09:44", "languages": [], "model_or_dataset": "model", "model_size": 0.279, "model_architectures": "LukeForSequenceClassification", "multi_labels": [ "Reasoning", "Textual Inference", "Language Models", "Semantic Text Processing" ] }, { "description": "##llm-jpのインストラクトモデル", "url": "https://huggingface.co/DeL-TaiseiOzaki/Tengentoppa-llm-jp-3.7B-reasoning-instruct", "project_name": "Tengentoppa-llm-jp-3.7B-reasoning-instruct", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-12-09 13:14:44", "latest_commit": "2024-12-11 16:00:29", "languages": [], "model_or_dataset": "model", "model_size": 3.48, "model_architectures": "LlamaModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese Stable LM Instruct Gamma 7B +", "url": "https://huggingface.co/ohwi/japanese-stablelm-instruct-gamma-7b-dpo-uf-v1", "project_name": "japanese-stablelm-instruct-gamma-7b-dpo-uf-v1", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-03-09 16:47:29", "latest_commit": "2024-03-21 14:33:07", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models" ] }, { "description": "Shisa 7B Shisa 7B (shisa-7b-v1)", "url": "https://huggingface.co/LoneStriker/shisa-7b-v1-4.0bpw-h6-exl2", "project_name": "shisa-7b-v1-4.0bpw-h6-exl2", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-12-07 17:59:51", "latest_commit": "2023-12-07 18:54:26", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "Introduction Who am I: Qishen Ha", "url": "https://huggingface.co/haqishen/h2o-Llama-3-8B-Japanese-Instruct", "project_name": "h2o-Llama-3-8B-Japanese-Instruct", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-04-24 07:48:45", "latest_commit": "2024-06-24 08:57:49", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "kotoba-whisper-v2.0-mlx This repository contains a converted mlx-whisper model of kotoba-whisper-v2.0 which is suitable for running with Apple Silicon.", "url": "https://huggingface.co/kaiinui/kotoba-whisper-v2.0-mlx", "project_name": "kotoba-whisper-v2.0-mlx", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-09-18 14:36:47", "latest_commit": "2024-09-18 14:44:07", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese Dummy Tokenizer Repository containing a dummy Japanese Tokenizer trained on snow_simplified_japanese_corpus dataset.", "url": "https://huggingface.co/ybelkada/japanese-dummy-tokenizer", "project_name": "japanese-dummy-tokenizer", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2022-04-06 12:31:37", "latest_commit": "2022-07-11 08:24:32", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "karakuri-midroze-CV モデルの詳細は、こちらです。", "url": "https://huggingface.co/sbtom/karakuri-midrose-CV", "project_name": "karakuri-midrose-CV", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-04-16 17:05:06", "latest_commit": "2024-04-17 00:35:06", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Summary This is an Gemma 2 Baku lora, created using the VNTL 3.1 dataset.", "url": "https://huggingface.co/lmg-anon/vntl-gemma2-2b-lora", "project_name": "vntl-gemma2-2b-lora", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-10-26 23:34:38", "latest_commit": "2024-10-27 00:33:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "A pretrained Japanese TTS model intended for use in VITS-JaPros-WebUI.", "url": "https://huggingface.co/litagin/vits-japros-pretrained", "project_name": "vits-japros-pretrained", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2023-09-30 00:16:22", "latest_commit": "2023-10-11 09:55:47", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Oumuamua-7b-base This is a merge of pre-trained language models created using mergekit. ", "url": "https://huggingface.co/nitky/Oumuamua-7b-base", "project_name": "Oumuamua-7b-base", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-06-01 10:39:53", "latest_commit": "2024-06-01 15:31:15", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "rinna-gpt2-medium-japanese-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/rinna-gpt2-medium-japanese-ud-causal", "project_name": "rinna-gpt2-medium-japanese-ud-causal", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-09-07 07:54:18", "latest_commit": "2024-09-12 22:28:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Llama-3-Nymeria-ELYZA-8B Experimental merge between a Llama 3 model that has had continued pre-training with Japanese data and a regular RP model to see how well it keeps its Japanese capability and RP capability.", "url": "https://huggingface.co/mpasila/Llama-3-Nymeria-ELYZA-8B", "project_name": "Llama-3-Nymeria-ELYZA-8B", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-07-17 15:02:32", "latest_commit": "2024-07-17 15:11:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "料理を検索するための質問文と、質問文に含まれる検索検索用キーワードの情報を持ったデータセットです固有表現の種類は以下の４つです。 ", "url": "https://huggingface.co/datasets/wolf4032/token-classification-japanese-search-local-cuisine", "project_name": "token-classification-japanese-search-local-cuisine", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-04-28 07:26:52", "latest_commit": "2024-05-12 07:19:14", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "より多く作成したのがこっちhttps://huggingface.co/datasets/if001/elementray_m calm3-22bを使って簡単な日本語の例文を作成したデータセットです。 ", "url": "https://huggingface.co/datasets/if001/elementray_small", "project_name": "elementray_small", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-09-21 22:30:12", "latest_commit": "2024-09-28 08:08:24", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "LogicJa Dataset Card Overview LogicJa is a multi-turn benchmark designed to assess the reasoning capabilities of Japanese language models across multiple domains.", "url": "https://huggingface.co/datasets/sionic-ai/LogicJa", "project_name": "LogicJa", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2025-02-19 02:34:06", "latest_commit": "2025-02-19 07:33:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Reasoning" ] }, { "description": "KanjiVG PNG images with textual descriptions This dataset is an adaptation of KanjiVG by Ulrich Apel.", "url": "https://huggingface.co/datasets/davidstap/kanji_definitions", "project_name": "kanji_definitions", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2025-01-21 10:51:12", "latest_commit": "2025-01-21 11:16:25", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Visual Data in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Jamp: Controlled Japanese Temporal Inference Dataset for Evaluating Generalization Capacity of Language Models Jamp(tomo-vv/temporalNLI_dataset)", "url": "https://huggingface.co/datasets/zenless-lab/jamp", "project_name": "jamp", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-12-18 10:09:27", "latest_commit": "2024-12-18 14:30:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "以下の条件に同意したうえで、公開されたモデル及びデータセット等（以下「本コンテンツ」）といいます）をダウンロードします。 ", "url": "https://huggingface.co/datasets/weblab-GENIAC/jbbh", "project_name": "jbbh", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "以下の条件に同意したうえで、公開されたモデル及びデータセット等（以下「本コンテンツ」）といいます）をダウンロードします。 ", "url": "https://huggingface.co/datasets/weblab-GENIAC/jarc", "project_name": "jarc", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This dataset is based on the Japanese version of Wikipedia dataset and converted into a multi-turn conversation format using llama2Pro8B. ", "url": "https://huggingface.co/datasets/shi3z/ja_conv_wikipedia_llama2pro8b_10k", "project_name": "ja_conv_wikipedia_llama2pro8b_10k", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-01-12 06:17:36", "latest_commit": "2024-01-12 06:18:48", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Dialogue Systems & Conversational Agents", "Annotation and Dataset Development" ] }, { "description": "データセットについて Kendamarron/jimba-instuction-1k-betaのinstructionのうち200個をより単純なタスクに書き換えたデータセットです。 ", "url": "https://huggingface.co/datasets/Kendamarron/jimba-instruction-simplify-200", "project_name": "jimba-instruction-simplify-200", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": "2024-03-31 12:14:17", "latest_commit": "2024-04-01 04:34:41", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "To avoid leaking the dataset to LLM training data, it is not distributed on the open web.", "url": "https://huggingface.co/datasets/naist-nlp/multils-japanese", "project_name": "multils-japanese", "downloads": 14, "source": "Hugging Face", "score": -0.05200556696348113, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Annotation and Dataset Development" ] }, { "description": "Model Card for Model ID Model Details Model Description", "url": "https://huggingface.co/flypg/DeepSeek-R1-Distill-Qwen-14B-Japanese-chat", "project_name": "DeepSeek-R1-Distill-Qwen-14B-Japanese-chat", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2025-02-09 04:41:45", "latest_commit": "2025-02-20 03:59:11", "languages": [], "model_or_dataset": "model", "model_size": 14.8, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Dialogue Systems & Conversational Agents" ] }, { "description": "Japanese ELECTRA-small We provide a Japanese ELECTRA-Small model, as described in ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators.", "url": "https://huggingface.co/cinmodel/electra-small-japanese-generator", "project_name": "electra-small-japanese-generator", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2020-11-13 06:49:52", "latest_commit": "2020-12-11 22:26:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models" ] }, { "description": "roberta-base-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-luw-upos", "project_name": "roberta-base-japanese-luw-upos", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2021-12-21 00:41:00", "latest_commit": "2022-09-18 19:44:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Text Classification", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "ELECTRA small Japanese finance generator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-small-paper-japanese-fin-generator", "project_name": "electra-small-paper-japanese-fin-generator", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2021-10-04 13:38:47", "latest_commit": "2023-10-21 13:21:24", "languages": [], "model_or_dataset": "model", "model_size": 0.00491, "model_architectures": "ElectraForMaskedLM", "multi_labels": [ "Language Models" ] }, { "description": "electra-base-cyberbullying This is an ELECTRA Base model for the Japanese language finetuned for automatic cyberbullying detection.", "url": "https://huggingface.co/kit-nlp/transformers-ud-japanese-electra-base-discriminator-cyberbullying", "project_name": "transformers-ud-japanese-electra-base-discriminator-cyberbullying", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2022-09-09 04:08:15", "latest_commit": "2022-11-01 07:18:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForSequenceClassification", "multi_labels": [ "Responsible & Trustworthy NLP" ] }, { "description": "このモデルはluke-japanese-large-liteをファインチューニングして、Question-Answeringに用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-large-finetuned-QA", "project_name": "luke-japanese-large-finetuned-QA", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-01-17 09:07:23", "latest_commit": "2023-05-04 14:19:28", "languages": [], "model_or_dataset": "model", "model_size": 0.41300000000000003, "model_architectures": "LukeForQuestionAnswering", "multi_labels": [ "Natural Language Interfaces", "Question Answering", "Language Models", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "Wav2Vec2-XLS-R-300M-Japanese-Hiragana Fine-tuned facebook/wav2vec2-xls-r-300m on Japanese Hiragana characters using JSUT, JVS, Common Voice, and in-house dataset.", "url": "https://huggingface.co/snu-nia-12/wav2vec2-xls-r-300m_nia12_phone-hiragana_japanese", "project_name": "wav2vec2-xls-r-300m_nia12_phone-hiragana_japanese", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-01-11 14:58:40", "latest_commit": "2023-01-11 15:19:42", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "japanese-reversed-gpt2-medium-unidic This is a medium-sized Japanese reversed GPT-2 model using BERT-like tokenizer.", "url": "https://huggingface.co/okazaki-lab/japanese-reversed-gpt2-medium-unidic", "project_name": "japanese-reversed-gpt2-medium-unidic", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-03-08 16:54:44", "latest_commit": "2023-03-16 06:18:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "このモデルはcl-tohoku/bert-large-japanese-v2をファインチューニングして、固有表現抽出（NER）に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/bert-large-japanese-v2-finetuned-ner", "project_name": "bert-large-japanese-v2-finetuned-ner", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-05-26 09:38:08", "latest_commit": "2023-07-21 14:10:18", "languages": [], "model_or_dataset": "model", "model_size": 0.336, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition", "Language Models", "Semantic Text Processing" ] }, { "description": "MPT-7B-inst このモデルは、MosaicMLのllm-foundryリポジトリを使用してmosaicml/mpt-7b-instructをファインチューニングしたモデルです。 ", "url": "https://huggingface.co/Jumtra/mpt-7b-inst", "project_name": "mpt-7b-inst", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-05-24 14:22:33", "latest_commit": "2023-06-26 01:09:06", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MPTForCausalLM", "multi_labels": [] }, { "description": "VITS TTS Japanese Only Amitaro VITS TTS model finetuned using free voice data from amitaro free voice here あみたろの声素材工房 Finetuning code is from Plachtaa - VITS Fast Fine-tuning See sample usage Lycoris53/VITS-TTS-Japanese-Only-Amitaro Model Details 76 annotated wav file train for 600 epoch 日本語の説明などこちらに AiThinkso.net Developed by:", "url": "https://huggingface.co/Lycoris53/Vits-TTS-Japanese-Only-Amitaro", "project_name": "Vits-TTS-Japanese-Only-Amitaro", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-06-28 14:02:12", "latest_commit": "2023-07-01 16:54:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "About This model is Lightblue's QLoRA finetune of OpenOrca's Open-Orca/OpenOrcaxOpenChat-Preview2-13B model on Japanese fine-tuning datasets.", "url": "https://huggingface.co/lightblue/openorca_stx", "project_name": "openorca_stx", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-09-12 09:29:10", "latest_commit": "2023-10-02 10:25:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Natural Language Interfaces", "Question Answering" ] }, { "description": "llm-jp-13b-instruct-lora-jaster-dolly-oasst-v1.0", "url": "https://huggingface.co/llm-jp/llm-jp-13b-instruct-lora-jaster-dolly-oasst-v1.0", "project_name": "llm-jp-13b-instruct-lora-jaster-dolly-oasst-v1.0", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-10-18 19:01:48", "latest_commit": "2023-10-20 08:41:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "つくよみちゃんデータセットを用いて calm-2-7b-chat をファインチューニングしたモデルです。", "url": "https://huggingface.co/offtoung/tsukuyomi-chan-calm2-7b", "project_name": "tsukuyomi-chan-calm2-7b", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-12-21 08:46:37", "latest_commit": "2023-12-27 04:07:20", "languages": [], "model_or_dataset": "model", "model_size": 7.01, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Sentiment Analysis", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese_Fine_Tuned_Whisper_Model This model is a fine-tuned version of openai/whisper-tiny on the Common Voice dataset.", "url": "https://huggingface.co/Nikolajvestergaard/Japanese_Fine_Tuned_Whisper_Model", "project_name": "Japanese_Fine_Tuned_Whisper_Model", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-03-14 15:32:35", "latest_commit": "2023-03-15 09:23:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "Electra Base Japanese Irony", "url": "https://huggingface.co/kit-nlp/transformers-ud-japanese-electra-base-discriminator-irony", "project_name": "transformers-ud-japanese-electra-base-discriminator-irony", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2022-11-07 07:55:57", "latest_commit": "2023-06-09 06:50:49", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForSequenceClassification", "multi_labels": [ "Stylistic Analysis", "Sentiment Analysis" ] }, { "description": "bert-base-irony", "url": "https://huggingface.co/kit-nlp/bert-base-japanese-basic-char-v2-irony", "project_name": "bert-base-japanese-basic-char-v2-irony", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2022-11-07 07:33:23", "latest_commit": "2022-11-08 00:10:26", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Language Models", "Stylistic Analysis", "Semantic Text Processing", "Sentiment Analysis" ] }, { "description": "deberta-large-japanese-wikipedia-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-wikipedia-ud-goeswith", "project_name": "deberta-large-japanese-wikipedia-ud-goeswith", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2022-09-18 08:41:06", "latest_commit": "2023-05-12 01:29:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "Details: https://spacy.io/models/ja#ja_core_news_md Japanese pipeline optimized for CPU.", "url": "https://huggingface.co/spacy/ja_core_news_md", "project_name": "ja_core_news_md", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2021-07-07 12:10:08", "latest_commit": "2023-10-10 06:45:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Representation Learning", "Syntactic Text Processing", "Named Entity Recognition", "Semantic Text Processing" ] }, { "description": "概要質問と応答から、その過程の思考を生成する言語モデルです。", "url": "https://huggingface.co/SousiOmine/Kuroiso-CR-7B-20250124", "project_name": "Kuroiso-CR-7B-20250124", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2025-01-24 16:57:47", "latest_commit": "2025-01-25 04:20:17", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Natural Language Interfaces", "Question Answering", "Annotation and Dataset Development" ] }, { "description": "roberta-small-japanese-char-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-small-japanese-char-luw-upos", "project_name": "roberta-small-japanese-char-luw-upos", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2021-12-23 02:47:23", "latest_commit": "2024-08-20 18:36:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Text Classification", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "deberta-small-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-small-japanese-luw-upos", "project_name": "deberta-small-japanese-luw-upos", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2022-05-24 03:52:45", "latest_commit": "2024-08-20 17:28:44", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-base-japanese-wikipedia-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia-luw-upos", "project_name": "deberta-base-japanese-wikipedia-luw-upos", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2022-06-25 06:28:11", "latest_commit": "2024-08-20 17:53:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "bert-large-japanese-wikipedia-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-wikipedia-ud-head", "project_name": "bert-large-japanese-wikipedia-ud-head", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2022-06-21 07:38:19", "latest_commit": "2024-08-20 19:45:52", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForQuestionAnswering", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese-Alpaca-2-13B-GGUF Japanese-Alpaca-2-13B-GGUFはJapanese-Alpaca-2-13BのGGUF形式です。 ", "url": "https://huggingface.co/owner203/japanese-alpaca-2-13b-gguf", "project_name": "japanese-alpaca-2-13b-gguf", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-12-20 10:56:08", "latest_commit": "2023-12-26 11:46:41", "languages": [], "model_or_dataset": "model", "model_size": 13.3, "model_architectures": null, "multi_labels": [ "Multilinguality" ] }, { "description": "isekai-bert-v1", "url": "https://huggingface.co/isek-ai/isekai-bert-v1", "project_name": "isekai-bert-v1", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-10-17 08:52:01", "latest_commit": "2023-10-17 13:14:13", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-large-japanese-aozora-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-aozora-ud-goeswith", "project_name": "deberta-large-japanese-aozora-ud-goeswith", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2022-10-14 11:35:36", "latest_commit": "2024-08-20 19:20:44", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "deberta-base-japanese-unidic-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-unidic-ud-head", "project_name": "deberta-base-japanese-unidic-ud-head", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2022-06-18 10:20:24", "latest_commit": "2024-08-20 20:09:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-large-japanese-unidic-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-unidic-luw-upos", "project_name": "deberta-large-japanese-unidic-luw-upos", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2022-06-10 12:53:45", "latest_commit": "2024-08-20 20:16:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Syntactic Parsing", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "ELECTRA base Japanese discriminator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-base-japanese-discriminator", "project_name": "electra-base-japanese-discriminator", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2021-11-15 17:39:41", "latest_commit": "2022-12-09 00:43:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForPreTraining", "multi_labels": [ "Language Models" ] }, { "description": "ELYZA-japanese-CodeLlama-7b Model Description ELYZA-japanese-CodeLlama-7b は、 Code Llamaをベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-CodeLlama-7b", "project_name": "ELYZA-japanese-CodeLlama-7b", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-11-07 12:48:15", "latest_commit": "2023-11-15 00:38:12", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1 japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1 is a merge of the following models: mistralai/Mistral-7B-Instruct-v0.1 stabilityai/japanese-stablelm-instruct-gamma-7b 🧩 Configuration slices: - sources: - model: mistralai/Mistral-7B-Instruct-v0.1 layer_range:", "url": "https://huggingface.co/MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1", "project_name": "japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-01-16 12:23:01", "latest_commit": "2024-01-16 12:27:54", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "deberta-large-japanese-juman-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-juman-ud-goeswith", "project_name": "deberta-large-japanese-juman-ud-goeswith", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-02-05 13:24:47", "latest_commit": "2024-08-30 14:27:11", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "Japanese BERT-base (MeCab + Unigram)", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_mecab-unigram", "project_name": "bert-base-japanese_mecab-unigram", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-06-14 06:58:47", "latest_commit": "2023-06-16 01:01:25", "languages": [], "model_or_dataset": "model", "model_size": 0.109, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "This model learned the proceedings of the Japanese parliament in 2022.", "url": "https://huggingface.co/ohtaman/falcon-7b-kokkai2022-lora", "project_name": "falcon-7b-kokkai2022-lora", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-07-14 20:05:55", "latest_commit": "2023-09-20 16:36:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining" ] }, { "description": "deberta-large-japanese-wikipedia-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-wikipedia-luw-upos", "project_name": "deberta-large-japanese-wikipedia-luw-upos", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2022-07-06 03:15:12", "latest_commit": "2024-08-20 17:54:58", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "MPT-7B-base このモデルは、MosaicMLのllm-foundryリポジトリを使用してmosaicml/mpt-7bをファインチューニングしたモデルです。 ", "url": "https://huggingface.co/Jumtra/mpt-7b-base", "project_name": "mpt-7b-base", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-05-24 14:30:09", "latest_commit": "2023-06-26 01:08:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MPTForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "このモデルはluke-japanese-base-liteをファインチューニングして、Question-Answeringに用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-lite-jsquad", "project_name": "luke-japanese-base-lite-jsquad", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-02-08 00:59:13", "latest_commit": "2023-07-21 14:10:34", "languages": [], "model_or_dataset": "model", "model_size": 0.132, "model_architectures": "LukeForQuestionAnswering", "multi_labels": [ "Natural Language Interfaces", "Question Answering", "Language Models", "Annotation and Dataset Development" ] }, { "description": "アニメ声のようなわざとらしい声でもなく、ボカロなどのソフトを使ったいかにも合成の音声でもなく、クラスに一人くらいいそうな、自然で親しみやすい美少女の声を…。", "url": "https://huggingface.co/ThePioneer/NaturalGirlyVoice", "project_name": "NaturalGirlyVoice", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-03-11 11:52:53", "latest_commit": "2023-03-11 13:44:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Only for Japanese Please use AutoTokenizer and AutoModelForCausalLM And must use Unifine format to input and output.", "url": "https://huggingface.co/ganchengguang/USA-7B-instruction-incontext-learning", "project_name": "USA-7B-instruction-incontext-learning", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-09-13 16:15:06", "latest_commit": "2023-11-17 16:44:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Low-Resource NLP", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "japanese-large-lm-1.7b-instruction-sft-4bit-32g-actorder_False", "url": "https://huggingface.co/line-corporation/japanese-large-lm-1.7b-instruction-sft-4bit-32g-actorder_False", "project_name": "japanese-large-lm-1.7b-instruction-sft-4bit-32g-actorder_False", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-09-26 06:14:25", "latest_commit": "2023-09-27 01:23:34", "languages": [], "model_or_dataset": "model", "model_size": 0.487, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Language Models" ] }, { "description": "Description A Japanese-specialized SentencePiece tokenizer trained for AI Novelist's SuperTrin and Damsel 20B models. ", "url": "https://huggingface.co/naclbit/trin_tokenizer_v3", "project_name": "trin_tokenizer_v3", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-05-22 05:00:42", "latest_commit": "2023-06-23 21:25:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Reproduced Japanese Stable LM Instruct Gamma 7B Model Description", "url": "https://huggingface.co/ohwi/japanese-stablelm-instruct-gamma-7b-repro", "project_name": "japanese-stablelm-instruct-gamma-7b-repro", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-02-06 13:43:50", "latest_commit": "2024-03-21 14:32:50", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Text Generation", "Language Models" ] }, { "description": "Shisa 7B Shisa 7B (shisa-7b-v1)", "url": "https://huggingface.co/LoneStriker/shisa-7b-v1-8.0bpw-h8-exl2", "project_name": "shisa-7b-v1-8.0bpw-h8-exl2", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-12-07 18:22:23", "latest_commit": "2023-12-07 18:54:33", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "Manga OCR Optical character recognition for Japanese text, with the main focus being Japanese manga.", "url": "https://huggingface.co/agiera/manga-ocr-base", "project_name": "manga-ocr-base", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-11-15 00:16:35", "latest_commit": "2023-11-15 03:09:56", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VisionEncoderDecoderModel", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-70B-AWQ", "project_name": "japanese-stablelm-instruct-beta-70B-AWQ", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-11-02 15:45:23", "latest_commit": "2023-11-09 18:16:16", "languages": [], "model_or_dataset": "model", "model_size": 9.68, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "モデル説明 (model explanation) MoeDiffusionPlusPlus 0.7 : DreamShaper 3.3 (full) 0.3。 ", "url": "https://huggingface.co/ThePioneer/MoeSharpV1", "project_name": "MoeSharpV1", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-01-21 07:30:06", "latest_commit": "2023-02-03 23:46:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Explainability & Interpretability in NLP" ] }, { "description": "Shisa 7B Shisa 7B (shisa-7b-v1)", "url": "https://huggingface.co/LoneStriker/shisa-7b-v1-3.0bpw-h6-exl2", "project_name": "shisa-7b-v1-3.0bpw-h6-exl2", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-12-07 17:52:29", "latest_commit": "2023-12-07 18:54:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", "url": "https://huggingface.co/LoneStriker/SambaLingo-Japanese-Chat-5.0bpw-h6-exl2", "project_name": "SambaLingo-Japanese-Chat-5.0bpw-h6-exl2", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-03-07 06:53:27", "latest_commit": "2024-03-07 06:55:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "はじめに GoogleのGemma-2Bを日本語で使えるように継続事前学習を施した、商用利用可能なベースモデルです。 ", "url": "https://huggingface.co/alfredplpl/suzume-poc", "project_name": "suzume-poc", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-03-14 09:51:38", "latest_commit": "2024-03-17 15:05:20", "languages": [], "model_or_dataset": "model", "model_size": 2.51, "model_architectures": "GemmaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Shisa 7B Shisa 7B (shisa-7b-v1)", "url": "https://huggingface.co/LoneStriker/shisa-7b-v1-6.0bpw-h6-exl2", "project_name": "shisa-7b-v1-6.0bpw-h6-exl2", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-12-07 18:14:46", "latest_commit": "2023-12-07 18:58:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "Swallow-MS-7b-v0.1 このモデルはtokyotech-llm/Swallow-MS-7b-instruct-v0.1のtokenizer.chat_templateを以下に変更したものです。 ", "url": "https://huggingface.co/HachiML/Swallow-MS-7b-instruct-v0.1", "project_name": "Swallow-MS-7b-instruct-v0.1", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-05-05 01:27:10", "latest_commit": "2024-05-05 15:21:25", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "モデルベースモデル：microsoft/Phi-3-mini-4k-instruct 学習データセット：llm-jp/hh-rlhf-12k-ja 学習方式：フルパラメータチューニングサンプル import torch from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained( \"ryota39/Phi-3-mini-4k-instruct-dpo\", trust_remote_code=True, ) model = AutoModelForCausalLM.from_pretrained( \"ryota39/Phi-3-mini-4k-instruct-dpo\", device_map=\"auto\", torch_dtype='auto', trust_remote_code=True, ) text = \"<|user|>\\n与えられた質問に対して英語で思考し、日本語で答えてください。", "url": "https://huggingface.co/ryota39/Phi-3-mini-4k-instruct-dpo", "project_name": "Phi-3-mini-4k-instruct-dpo", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-04-24 16:21:32", "latest_commit": "2024-05-01 07:41:46", "languages": [], "model_or_dataset": "model", "model_size": 3.82, "model_architectures": "Phi3ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Omnia 2x7B Description This repository hosts Omnia-2x7B, an advanced Japanese language model specifically trained for generating novels.", "url": "https://huggingface.co/Elizezen/Omnia-2x7B", "project_name": "Omnia-2x7B", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-04-23 05:13:33", "latest_commit": "2024-04-23 06:51:55", "languages": [], "model_or_dataset": "model", "model_size": 12.9, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Model Card for Model ID Japanese transcription, testing in progress to see results, main personal use cases are japanese comedy usage 9GB vram with this Lora Model Details Model Description openai-whisper-large-v2-LORA-ja Developed by: FZNX Model type: PEFT LORA Language(s) (NLP):", "url": "https://huggingface.co/fznx92/openai-whisper-large-v2-ja-transcribe-colab", "project_name": "openai-whisper-large-v2-ja-transcribe-colab", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-12-29 19:51:54", "latest_commit": "2023-12-30 05:05:58", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "tohoku-nlp/bert-base-japanese-v3 を RetroMAE で事前学習したモデルです。 ", "url": "https://huggingface.co/hotchpotch/bert-base-japanese-v3-retromae", "project_name": "bert-base-japanese-v3-retromae", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-10-23 14:00:43", "latest_commit": "2024-10-29 23:51:26", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "cl-nagoya/ruri-pt-base を RetroMAE で事前学習したモデルです。 ", "url": "https://huggingface.co/hotchpotch/ruri-pt-base-retromae", "project_name": "ruri-pt-base-retromae", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-10-26 19:40:25", "latest_commit": "2024-10-29 23:52:44", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "◆ArcanaMix 二次元イラストを中心に、かわいいイラストが出力できるように調整したモデルです。。 ", "url": "https://huggingface.co/Hemlok/ArcanaMix", "project_name": "ArcanaMix", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-06-30 18:13:36", "latest_commit": "2023-06-30 18:52:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Mistral-Nemo-NT-Ko-12B-sft Description Mistral-Nemo-NT-Ko-12B-sft is an instruction-tuned version of mistralai/Mistral-Nemo-Base-2407, fine-tuned across four languages: English, Korean, Chinese, and Japanese.", "url": "https://huggingface.co/werty1248/Mistral-Nemo-NT-Ko-12B-sft", "project_name": "Mistral-Nemo-NT-Ko-12B-sft", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-09-19 05:31:19", "latest_commit": "2024-09-22 09:49:54", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "japanese-splade-base-v1を huggingface/text-embeddings-inferenceで動かすための fork です。 ", "url": "https://huggingface.co/hotchpotch/japanese-splade-base-v1-dummy-fast-tokenizer-for-tei", "project_name": "japanese-splade-base-v1-dummy-fast-tokenizer-for-tei", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-10-07 01:06:05", "latest_commit": "2024-10-07 01:12:44", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ESPnet2 TTS pretrained model kan-bayashi/jsut_tts_train_conformer_fastspeech2_transformer_teacher_raw_phn_jaconv_pyopenjtalk_prosody_train.loss.ave ♻", "url": "https://huggingface.co/espnet/kan-bayashi_jsut_tts_train_conformer_fastspeech2_transformer_teacher_r-truncated-f43d8f", "project_name": "kan-bayashi_jsut_tts_train_conformer_fastspeech2_transformer_teacher_r-truncated-f43d8f", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2021-10-23 20:31:44", "latest_commit": "2021-10-23 16:31:48", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ELYZA-japanese-Llama-2-MoE-2x7B-v0.1-GGUF 概要 Aratako/ELYZA-japanese-Llama-2-MoE-2x7B-v0.1の量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/ELYZA-japanese-Llama-2-MoE-2x7B-v0.1-GGUF", "project_name": "ELYZA-japanese-Llama-2-MoE-2x7B-v0.1-GGUF", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-03-06 15:44:10", "latest_commit": "2024-03-07 13:23:01", "languages": [], "model_or_dataset": "model", "model_size": 11.1, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Swallow-8Bは追加の日本語継続事前学習により日本語が大変流暢なLlama-3派生モデルです。", "url": "https://huggingface.co/aixsatoshi/Meta-Llama-3.1-8B-Instruct-plus-Swallow-b", "project_name": "Meta-Llama-3.1-8B-Instruct-plus-Swallow-b", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-07-24 03:30:51", "latest_commit": "2024-07-24 04:19:42", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "■endlessMixシリーズについて概要このモデルはDefactaをベースにした階層マージモデルです。", "url": "https://huggingface.co/teasan/endlessMix", "project_name": "endlessMix", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-04-01 15:34:02", "latest_commit": "2023-05-26 22:50:08", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese-Starling-ChatV-7B このモデルは\"chatntq-ja-7b-v1.0\"をベースにした7Bパラメータの日本語チャットモデルです。", "url": "https://huggingface.co/AbeShinzo0708/Japanese-Starling-ChatV-7B-exl2", "project_name": "Japanese-Starling-ChatV-7B-exl2", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-04-22 09:34:13", "latest_commit": "2024-04-22 09:39:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "TinySlime-1.1B-v1.0 TinySlime は日本語に特化した小規模言語モデルです。 ", "url": "https://huggingface.co/2121-8/TinySlime-1.1B-v1.0", "project_name": "TinySlime-1.1B-v1.0", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-06-14 23:46:31", "latest_commit": "2024-07-02 03:58:25", "languages": [], "model_or_dataset": "model", "model_size": 1.1, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "mlx-community/Llama-3.1-70B-Japanese-Instruct-2407-8bit The Model mlx-community/Llama-3.1-70B-Japanese-Instruct-2407-8bit was converted to MLX format from cyberagent/Llama-3.1-70B-Japanese-Instruct-2407 using mlx-lm version 0.16.1.", "url": "https://huggingface.co/mlx-community/Llama-3.1-70B-Japanese-Instruct-2407-8bit", "project_name": "Llama-3.1-70B-Japanese-Instruct-2407-8bit", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-07-26 13:05:01", "latest_commit": "2024-07-26 14:05:31", "languages": [], "model_or_dataset": "model", "model_size": 19.8, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "rinna-gpt2-xsmall-japanese-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/rinna-gpt2-xsmall-japanese-ud-causal", "project_name": "rinna-gpt2-xsmall-japanese-ud-causal", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-09-07 07:29:26", "latest_commit": "2024-09-12 22:22:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "sehiro/EvoLLM-JP-A-v1-7B-Q4_K_M-GGUF", "url": "https://huggingface.co/sehiro/EvoLLM-JP-A-v1-7B-Q4_K_M-GGUF", "project_name": "EvoLLM-JP-A-v1-7B-Q4_K_M-GGUF", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-09-15 02:45:03", "latest_commit": "2024-09-15 02:45:22", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "abeja-gpt2-large-japanese-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/abeja-gpt2-large-japanese-ud-causal", "project_name": "abeja-gpt2-large-japanese-ud-causal", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-09-08 02:06:47", "latest_commit": "2024-09-12 22:35:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Example ESPnet2 TTS model kan-bayashi/jsut_transformer_accent_with_pause ♻", "url": "https://huggingface.co/espnet/kan-bayashi_jsut_transformer_accent_with_pause", "project_name": "kan-bayashi_jsut_transformer_accent_with_pause", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2021-07-03 14:53:36", "latest_commit": "2021-07-03 11:04:24", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Model Card for Model ID MMedBench and KoreanMedMCQA Instruction Fine-Tuned Multilingual Llama3 8B 4Bit quantized model using QLoRA.", "url": "https://huggingface.co/SpassMedAI/MLMedLlama3", "project_name": "MLMedLlama3", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-08-20 17:08:05", "latest_commit": "2024-08-28 15:39:15", "languages": [], "model_or_dataset": "model", "model_size": 4.65, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "gpt2-medium-japanese-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/gpt2-medium-japanese-ud-causal", "project_name": "gpt2-medium-japanese-ud-causal", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-08-22 23:01:26", "latest_commit": "2024-08-25 17:55:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "gpt2-large-japanese-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/gpt2-large-japanese-upos", "project_name": "gpt2-large-japanese-upos", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-06-22 22:44:06", "latest_commit": "2024-07-27 07:49:47", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "BERT for Sentiment Analysis of Japanese Twitter", "url": "https://huggingface.co/LoneWolfgang/bert-for-japanese-twitter-sentiment-mixed-label", "project_name": "bert-for-japanese-twitter-sentiment-mixed-label", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-08-09 11:38:05", "latest_commit": "2024-08-09 12:10:35", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "youhansun/Llama-3-70B-japanese-suzume-vector-v0.1-Q2_K-GGUF", "url": "https://huggingface.co/youhansun/Llama-3-70B-japanese-suzume-vector-v0.1-Q2_K-GGUF", "project_name": "Llama-3-70B-japanese-suzume-vector-v0.1-Q2_K-GGUF", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-06-02 04:49:31", "latest_commit": "2024-06-02 04:52:45", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", "url": "https://huggingface.co/LoneStriker/SambaLingo-Japanese-Chat-4.0bpw-h6-exl2", "project_name": "SambaLingo-Japanese-Chat-4.0bpw-h6-exl2", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-03-07 06:51:43", "latest_commit": "2024-03-07 06:53:26", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "モデルの説明(English explanation is below.", "url": "https://huggingface.co/keitokei1994/Llama-3-ELYZA-hermes-2x8B", "project_name": "Llama-3-ELYZA-hermes-2x8B", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-06-26 15:11:08", "latest_commit": "2024-06-27 04:00:37", "languages": [], "model_or_dataset": "model", "model_size": 13.7, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Oumuamua-7b-instruct-v2-RP nitky/Oumuamua-7b-instruct-v2をロールプレイ用にLoRAでファインチューニングしたモデルです。 ", "url": "https://huggingface.co/Aratako/Oumuamua-7b-instruct-v2-RP", "project_name": "Oumuamua-7b-instruct-v2-RP", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-06-21 13:51:55", "latest_commit": "2024-06-23 13:55:42", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "実験モデルです。", "url": "https://huggingface.co/Akimite/Gemma2-9B-it-Boku-v1", "project_name": "Gemma2-9B-it-Boku-v1", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-07-13 13:15:46", "latest_commit": "2024-07-13 13:51:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Gemma2ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "AutoTrain Dataset for project: tam_jp", "url": "https://huggingface.co/datasets/tamdiep106/autotrain-data-tam_jp", "project_name": "autotrain-data-tam_jp", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-06-23 09:01:33", "latest_commit": "2023-06-23 10:46:11", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Evol-Alpaca-gen3-500 Evol-Alpaca-gen3-500は、", "url": "https://huggingface.co/datasets/HachiML/Evol-Alpaca-gen3-500", "project_name": "Evol-Alpaca-gen3-500", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-05-12 11:13:09", "latest_commit": "2024-05-20 01:43:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "自動生成Q&A データソースから､MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-GGUFを使ってQ&Aを自動生成したものです｡チームで作成したデータおよび「Common Crawlをもとに生成しています。 ", "url": "https://huggingface.co/datasets/hatakeyama-llm-team/AutoGeneratedJapaneseQA-other", "project_name": "AutoGeneratedJapaneseQA-other", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-05-19 02:45:59", "latest_commit": "2024-05-19 14:17:58", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "必ずすべての情報を網羅し､日本語で出力すること。", "url": "https://huggingface.co/datasets/RJZ/ConceptNetSyntheticPhi3Text_ja", "project_name": "ConceptNetSyntheticPhi3Text_ja", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-06-20 12:23:49", "latest_commit": "2024-06-21 14:22:03", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "The dataset of SLG framework.", "url": "https://huggingface.co/datasets/ganchengguang/Sentence-Classification-and-NER-Mix-Datasets-SCNM", "project_name": "Sentence-Classification-and-NER-Mix-Datasets-SCNM", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-10-18 14:48:29", "latest_commit": "2024-10-18 14:54:38", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Named Entity Recognition", "Text Classification" ] }, { "description": "Dataset containing ~3000 synthetically generated (by GPT-4o-mini) children's stories in Japanese that only use simple words.", "url": "https://huggingface.co/datasets/kai271/TinyStories-Japanese", "project_name": "TinyStories-Japanese", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-10-13 12:38:07", "latest_commit": "2024-10-13 12:44:19", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "合成日本語指示データセット概要このデータセットは、大規模言語モデル（LLM）を用いて自動生成された日本語の指示とそれに対する推論・初期応答・改善応答のコレクションです。", "url": "https://huggingface.co/datasets/DeL-TaiseiOzaki/magpie-reasonig-ja-qwen2.5-72b-16k", "project_name": "magpie-reasonig-ja-qwen2.5-72b-16k", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-10-28 01:32:23", "latest_commit": "2024-11-19 15:30:41", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japaneseを用いて、英語Wikipedia中のテキストを日本語に翻訳したデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/ja-en-r1-distill-qwen", "project_name": "ja-en-r1-distill-qwen", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2025-01-27 12:19:47", "latest_commit": "2025-01-27 12:30:46", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "試験的なデータセットとなります評価用データセットの作成方法 ChatGPT-4oで弁理士紹介を含む5つの観点から50のクエリを生成 ChatGPT-4oでクエリに対する回答と評価ポイントを生成※弁理士を直接紹介する10問は除く除外した10問に関しては、開放特許情報データベース及びj-platpatの公開データより回答を手動作成", "url": "https://huggingface.co/datasets/Chasottco/Japanese-patent-evaluation-dataset-01", "project_name": "Japanese-patent-evaluation-dataset-01", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2025-01-01 14:38:29", "latest_commit": "2025-01-01 15:16:47", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Dialogue Systems & Conversational Agents", "Language Models" ] }, { "description": "以下の条件に同意したうえで、公開されたモデル及びデータセット等（以下「本コンテンツ」）といいます）をダウンロードします。 ", "url": "https://huggingface.co/datasets/weblab-GENIAC/OpenBookQA-Japanese-masked", "project_name": "OpenBookQA-Japanese-masked", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Lurunchik/WikiHowNFQAを日本語に翻訳したデータセットです。", "url": "https://huggingface.co/datasets/GENIAC-Team-Ozaki/WikiHowNFQA-ja", "project_name": "WikiHowNFQA-ja", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-05-06 05:07:10", "latest_commit": "2024-05-06 05:40:54", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces" ] }, { "description": "データセットの各キーとその説明: state_id: ゲームの状態を一意に識別するためのID。 ", "url": "https://huggingface.co/datasets/MilosNaniwa/WarChestDojo", "project_name": "WarChestDojo", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-04-17 09:47:35", "latest_commit": "2024-04-18 10:31:21", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This dataset is based on the Japanese version of Wikipedia dataset and converted into a multi-turn conversation format using llama2Pro8B. ", "url": "https://huggingface.co/datasets/shi3z/ja_conv_wikipedia_llama2pro8b_20k", "project_name": "ja_conv_wikipedia_llama2pro8b_20k", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-01-20 01:15:28", "latest_commit": "2024-01-20 01:17:11", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "Negative Embedding / Textual Inversion NE4Mitsua is a Negative Embedding for Mitsua Diffusion One.", "url": "https://huggingface.co/datasets/R1b3y/NE4Mitsua", "project_name": "NE4Mitsua", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-05-05 00:40:46", "latest_commit": "2023-05-05 09:49:45", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "Aya_ja このデータセットはCohereForAI/aya_datasetの日本語インストラクションデータのみを抽出したデータセットです。 ", "url": "https://huggingface.co/datasets/ryota39/Aya_ja", "project_name": "Aya_ja", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-02-14 08:03:42", "latest_commit": "2024-02-14 08:25:06", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "Japanese multi-turn conversation data was generated using Qarasu14B based on Wikipedia data.", "url": "https://huggingface.co/datasets/shi3z/Qarasu_Wikipedia_multiturn_human_gpt_10K", "project_name": "Qarasu_Wikipedia_multiturn_human_gpt_10K", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-01-07 02:15:58", "latest_commit": "2024-01-07 04:26:40", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "DSR1D-qwen-2.5-32B-aya-ja-1k-generated これはdeepseek-ai/DeepSeek-R1-Distill-Qwen-32Bを用いて、weblab-GENIAC/aya-ja-evol-instruct-calm3-dpo-maskedの最初の1000件の応答をmax_new_tokens=3060で生成させました。 ", "url": "https://huggingface.co/datasets/kurogane/DSR1D-qwen-2.5-32B-aya-ja-1k-generated", "project_name": "DSR1D-qwen-2.5-32B-aya-ja-1k-generated", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2025-01-24 12:46:04", "latest_commit": "2025-01-24 12:49:50", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Text Generation", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Abstruct This is a multi-turn conversation dataset generated from the Japanese Wikipedia dataset using Orion14B-Chat.", "url": "https://huggingface.co/datasets/shi3z/ja_conv_wikipedia_orion14B_10K", "project_name": "ja_conv_wikipedia_orion14B_10K", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2024-01-25 20:30:08", "latest_commit": "2024-01-25 20:56:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "⚠", "url": "https://huggingface.co/datasets/hotchpotch/jaqket_v1_qa_wikija_context", "project_name": "jaqket_v1_qa_wikija_context", "downloads": 13, "source": "Hugging Face", "score": -0.052012666089175645, "first_commit": "2023-12-07 08:36:30", "latest_commit": "2024-02-25 06:14:45", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "DataPilot/sarashina2.2-3Bx4-moe DataPilot/sarashina2.2-3Bx4-moeは、4つの「sbintuitions/sarashina2.2-3b-instruct-v0.1」モデルを統合して作成した約12Bパラメータ規模のMixture of Experts (MoE) モデルです。", "url": "https://huggingface.co/DataPilot/sarashina2.2-3Bx4-moe", "project_name": "sarashina2.2-3Bx4-moe", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2025-03-08 16:03:52", "latest_commit": "2025-03-08 16:15:20", "languages": [], "model_or_dataset": "model", "model_size": 7.76, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "bert-base-japanese-char-extended Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-char-extended", "project_name": "bert-base-japanese-char-extended", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2021-08-26 22:44:12", "latest_commit": "2022-06-21 07:21:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "ELECTRA base Japanese generator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-base-japanese-generator", "project_name": "electra-base-japanese-generator", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2021-11-15 17:23:50", "latest_commit": "2023-10-21 13:21:16", "languages": [], "model_or_dataset": "model", "model_size": 0.035500000000000004, "model_architectures": "ElectraForMaskedLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "ESを書くAI Japanese GPT-2 modelをファインチューニングしました。 ", "url": "https://huggingface.co/huranokuma/es_IT", "project_name": "es_IT", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2022-08-13 06:48:58", "latest_commit": "2022-08-14 05:47:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "ELECTRA Base Japanese for Information Triage", "url": "https://huggingface.co/y-oikawa/Information-triage-for-disaster-tweets", "project_name": "Information-triage-for-disaster-tweets", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-01-05 04:58:44", "latest_commit": "2023-01-05 06:18:49", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForSequenceClassification", "multi_labels": [ "Language Models" ] }, { "description": "japanese-large-lm-3.6b-instruction-sft-8bit-1g-actorder_True", "url": "https://huggingface.co/line-corporation/japanese-large-lm-3.6b-instruction-sft-8bit-1g-actorder_True", "project_name": "japanese-large-lm-3.6b-instruction-sft-8bit-1g-actorder_True", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-09-26 06:16:23", "latest_commit": "2023-09-28 00:02:06", "languages": [], "model_or_dataset": "model", "model_size": 1.17, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "t5-base-xlsum-ja", "url": "https://huggingface.co/p1atdev/t5-base-xlsum-ja", "project_name": "t5-base-xlsum-ja", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-10-06 03:18:28", "latest_commit": "2023-11-20 09:25:16", "languages": [], "model_or_dataset": "model", "model_size": 0.248, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-7B-GPTQ", "project_name": "japanese-stablelm-instruct-beta-7B-GPTQ", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-11-03 10:31:29", "latest_commit": "2023-11-03 12:54:41", "languages": [], "model_or_dataset": "model", "model_size": 1.13, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "DeBERTa V2 small Japanese This is a DeBERTaV2 model pretrained on Japanese texts.", "url": "https://huggingface.co/izumi-lab/deberta-v2-small-japanese", "project_name": "deberta-v2-small-japanese", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-10-21 13:24:28", "latest_commit": "2024-07-19 03:08:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese BERT-base (Vaporetto + Unigram)", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_vaporetto-unigram", "project_name": "bert-base-japanese_vaporetto-unigram", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-06-14 07:19:04", "latest_commit": "2023-06-16 01:05:55", "languages": [], "model_or_dataset": "model", "model_size": 0.109, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese BERT-base (Vaporetto + BPE) How to load the tokenizer Please download the dictionary file for Vaporetto + BPE from our GitHub repository.", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_vaporetto-bpe", "project_name": "bert-base-japanese_vaporetto-bpe", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-06-14 07:18:12", "latest_commit": "2023-06-16 01:05:21", "languages": [], "model_or_dataset": "model", "model_size": 0.109, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese BERT-base (Sudachi + BPE) How to load the tokenizer Please download the dictionary file for Sudachi + BPE from our GitHub repository.", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_sudachi-bpe", "project_name": "bert-base-japanese_sudachi-bpe", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-06-14 07:15:42", "latest_commit": "2023-06-16 01:03:32", "languages": [], "model_or_dataset": "model", "model_size": 0.109, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese BERT-base (Juman++ + WordPiece) How to load the tokenizer Please download the dictionary file for Juman++ +", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_jumanpp-wordpiece", "project_name": "bert-base-japanese_jumanpp-wordpiece", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-06-14 07:02:52", "latest_commit": "2023-06-16 01:03:10", "languages": [], "model_or_dataset": "model", "model_size": 0.109, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese BERT-base (MeCab + WordPiece) How to load the tokenizer Please download the dictionary file for MeCab + WordPiece from our GitHub repository.", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_mecab-wordpiece", "project_name": "bert-base-japanese_mecab-wordpiece", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-06-14 06:59:57", "latest_commit": "2023-06-16 01:01:56", "languages": [], "model_or_dataset": "model", "model_size": 0.109, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "このモデルはdeberta-v2-tiny-japaneseをファインチューニングしてQAタスクに用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-tiny-japanese-finetuned-QA", "project_name": "deberta-v2-tiny-japanese-finetuned-QA", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-05-11 10:34:38", "latest_commit": "2023-05-11 10:38:32", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering", "multi_labels": [ "Natural Language Interfaces", "Question Answering", "Language Models", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "NLLB-200 1.3B fine-tuned on Ascendance of a Bookworm", "url": "https://huggingface.co/thefrigidliquidation/nllb-200-distilled-1.3B-bookworm", "project_name": "nllb-200-distilled-1.3B-bookworm", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2022-07-27 20:39:08", "latest_commit": "2024-04-14 18:45:22", "languages": [], "model_or_dataset": "model", "model_size": 1.37, "model_architectures": "M2M100ForConditionalGeneration", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-base-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-luw-upos", "project_name": "deberta-base-japanese-luw-upos", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2022-05-24 06:55:47", "latest_commit": "2024-08-20 19:21:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Syntactic Parsing", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "TakoMT", "url": "https://huggingface.co/staka/takomt", "project_name": "takomt", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2022-05-08 03:52:40", "latest_commit": "2023-08-15 17:32:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MarianMTModel", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Semantic Text Processing" ] }, { "description": "タイトルから記事本文を生成するモデル SEE: https://qiita.com/sonoisa/items/a9af64ff641f0bbfed44", "url": "https://huggingface.co/sonoisa/t5-base-japanese-article-generation", "project_name": "t5-base-japanese-article-generation", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2021-04-03 13:55:25", "latest_commit": "2024-04-17 11:39:12", "languages": [], "model_or_dataset": "model", "model_size": 0.223, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese-LLaMA-3-8B-Instruct-v2 Japanese-LLaMA-3-8B-Instruct-v2は指示実行モデル、フルモデルです。 ", "url": "https://huggingface.co/owner203/japanese-llama-3-8b-instruct-v2", "project_name": "japanese-llama-3-8b-instruct-v2", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-06-10 10:10:19", "latest_commit": "2024-06-21 06:35:31", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM", "multi_labels": [] }, { "description": "ELECTRA small Japanese finance discriminator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-small-paper-japanese-fin-discriminator", "project_name": "electra-small-paper-japanese-fin-discriminator", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2021-10-04 13:35:33", "latest_commit": "2022-12-09 00:39:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForPreTraining", "multi_labels": [ "Language Models" ] }, { "description": "deberta-base-japanese-wikipedia-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia-ud-head", "project_name": "deberta-base-japanese-wikipedia-ud-head", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2022-06-25 13:03:09", "latest_commit": "2024-08-20 19:47:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Semantic Text Processing" ] }, { "description": "This pre-trained model is work in progress!", "url": "https://huggingface.co/naclbit/gpt-j-japanese-6.8b", "project_name": "gpt-j-japanese-6.8b", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2021-10-17 08:02:54", "latest_commit": "2021-11-10 15:28:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTJForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "bert-large-japanese-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-upos", "project_name": "bert-large-japanese-upos", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2021-08-19 10:39:38", "latest_commit": "2022-09-18 19:43:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1 japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1 is a merge of the following models: mistralai/Mistral-7B-Instruct-v0.1 stabilityai/japanese-stablelm-base-gamma-7b 🧩 Configuration slices: - sources: - model: mistralai/Mistral-7B-Instruct-v0.1 layer_range:", "url": "https://huggingface.co/MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1", "project_name": "japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-01-17 04:41:20", "latest_commit": "2024-01-17 04:46:18", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "roberta-large-japanese-juman-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-juman-ud-goeswith", "project_name": "roberta-large-japanese-juman-ud-goeswith", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-02-21 06:38:32", "latest_commit": "2024-08-30 14:49:26", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "Bloom model trained on Japanese corpus.", "url": "https://huggingface.co/Aruno/Bloom-JP-160m", "project_name": "Bloom-JP-160m", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-04-24 04:47:05", "latest_commit": "2023-04-24 05:12:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BloomForCausalLM", "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "LINE DistilBERT Japanese (forked by liwii)", "url": "https://huggingface.co/liwii/line-distilbert-base-japanese-fork", "project_name": "line-distilbert-base-japanese-fork", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-12-01 08:26:36", "latest_commit": "2023-12-01 09:16:46", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DistilBertForMaskedLM", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "https://qiita.com/SousiOmine/items/23313089c7c3f498996b 概要 sbintuitions/sarashina2.2-3b-instruct-v0.1に、 Kendamarron/jimba-instruction-allとSousiOmine/Japanese-Pythonic-FunctionCallを用いたQLoRAファインチューニングを行い、 python関数の呼び出しに対応させたモデルです。 ", "url": "https://huggingface.co/SousiOmine/sarashina2.2-3b-instruct-v0.1-Pythonic-FunctionCall", "project_name": "sarashina2.2-3b-instruct-v0.1-Pythonic-FunctionCall", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2025-03-10 03:21:38", "latest_commit": "2025-03-10 07:29:46", "languages": [], "model_or_dataset": "model", "model_size": 3.36, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "本モデルは、SakanaAI/TinySwallow-1.5B-Instructに対して、GRPOにより高橋メソッドのスライド生成に特化した追加学習を施したものです。", "url": "https://huggingface.co/Atotti/TinySwallow-GRPO-TakahashiMethod-v0.2", "project_name": "TinySwallow-GRPO-TakahashiMethod-v0.2", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2025-03-03 06:20:40", "latest_commit": "2025-03-03 11:24:03", "languages": [], "model_or_dataset": "model", "model_size": 1.54, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "調整したい方向とはだいぶ異なる方向にずれた。", "url": "https://huggingface.co/Akimite/Gemma2-9b-it-Youki-v1", "project_name": "Gemma2-9b-it-Youki-v1", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2025-01-24 05:59:38", "latest_commit": "2025-01-24 09:28:24", "languages": [], "model_or_dataset": "model", "model_size": 9.24, "model_architectures": "Gemma2ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "概要このモデルはQwen/Qwen2.5-32BをファインチューニングしたAbeja社のベースモデルにDeepSeek社のR1蒸留モデルであるdeepseek-ai/DeepSeek-R1-Distill-Qwen-32Bを日本語ファインチューニングしたcyber agent社のcyberagent/DeepSeek-R1-Distill-Qwen-32B-JapaneseをChatVectorを用いて加えたものに、独自の日本語強化ファインチューニングをしたモデルとなります。 ", "url": "https://huggingface.co/DataPilot/Arrival-32B-Instruct-v0.4", "project_name": "Arrival-32B-Instruct-v0.4", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2025-01-27 16:28:01", "latest_commit": "2025-01-27 16:45:56", "languages": [], "model_or_dataset": "model", "model_size": 32.8, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models" ] }, { "description": "名言推論モデル", "url": "https://huggingface.co/Momerio/meigen_generate_Japanese", "project_name": "meigen_generate_Japanese", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2021-10-13 15:30:14", "latest_commit": "2021-10-26 01:19:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "ebisuke/liz-nojaloli-nxja-ja License MIT ベースとしてabeja/gpt-neox-japanese-2.7bを使用しています。 ", "url": "https://huggingface.co/ebisuke/liz-nojaloli-nxja-ja", "project_name": "liz-nojaloli-nxja-ja", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-06-02 02:12:20", "latest_commit": "2023-06-09 02:07:42", "languages": [], "model_or_dataset": "model", "model_size": 2.6, "model_architectures": "GPTNeoXJapaneseForCausalLM", "multi_labels": [ "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese BERT-base (Sudachi + Unigram)", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_sudachi-unigram", "project_name": "bert-base-japanese_sudachi-unigram", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-06-14 07:16:29", "latest_commit": "2023-06-16 01:03:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Model description Cyberagent様のcyberagent/calm2-7b-chatを追加学習した、作家さん用アシスタントAIのアルファ版です。 ", "url": "https://huggingface.co/falche/opennovel_oc2_01a_7b", "project_name": "opennovel_oc2_01a_7b", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-12-09 13:36:32", "latest_commit": "2023-12-09 15:19:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Dialogue Response Generation" ] }, { "description": "ChatNTQ JA 7B V1.0 Model Description", "url": "https://huggingface.co/NTQAI/chatntq-ja-7b-v1.0", "project_name": "chatntq-ja-7b-v1.0", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-12-26 06:22:59", "latest_commit": "2023-12-26 09:22:34", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "This model is traned with llm-japanese-dataset dataset.", "url": "https://huggingface.co/ganchengguang/Yoko_13B_Japanese_QLoRA", "project_name": "Yoko_13B_Japanese_QLoRA", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-08-17 16:26:52", "latest_commit": "2023-08-17 16:51:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "This model is traned with guanaco dataset.", "url": "https://huggingface.co/ganchengguang/Yoko-7B-Japanese-v0", "project_name": "Yoko-7B-Japanese-v0", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-08-09 16:28:38", "latest_commit": "2023-08-10 13:00:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Text Style Transfer" ] }, { "description": "モデルの概略霧雨魔理沙とおしゃべりできるモデルです。 ", "url": "https://huggingface.co/tsukemono/japanese-stablelm-base-alpha-7b-f16-marisa", "project_name": "japanese-stablelm-base-alpha-7b-f16-marisa", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-08-28 13:10:57", "latest_commit": "2023-09-01 10:26:02", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "JapaneseStableLMAlphaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "モデルの概略東方Projectのキャラクターである霧雨魔理沙とおしゃべりできるモデルです。 ", "url": "https://huggingface.co/tsukemono/japanese-novel-gpt-j-6b-f16-marisa", "project_name": "japanese-novel-gpt-j-6b-f16-marisa", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-09-02 15:15:51", "latest_commit": "2023-09-06 18:42:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTJForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "OpenCALM-LARGE Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by CyberAgent, Inc. ", "url": "https://huggingface.co/Mizuiro-sakura/open-calm-large-finetuned-databricks-dolly", "project_name": "open-calm-large-finetuned-databricks-dolly", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-06-26 13:37:53", "latest_commit": "2023-07-02 14:30:47", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese BERT-base (Nothing + BPE) How to load the tokenizer Please download the dictionary file for Nothing + BPE from our GitHub repository.", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_nothing-bpe", "project_name": "bert-base-japanese_nothing-bpe", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-06-14 08:06:50", "latest_commit": "2023-06-16 01:06:43", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "MambaSan-130m-instruct 🐍 MambaSan-instruct is the first chat Japanese language model based on a state-space model architecture (Mamba), not a transformer.", "url": "https://huggingface.co/loiccabannes/MambaSan-130m-instruct", "project_name": "MambaSan-130m-instruct", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-01-30 23:20:00", "latest_commit": "2024-02-06 23:48:11", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Model Card for Model ID Original model elyza/ELYZA-japanese-Llama-2-7b-instruct which is based on Meta's \"Llama 2\" and has undergone additional pre-training in Japanese instruction.", "url": "https://huggingface.co/dahara1/ELYZA-japanese-Llama-2-7b-instruct-AWQ", "project_name": "ELYZA-japanese-Llama-2-7b-instruct-AWQ", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-09-08 08:35:31", "latest_commit": "2023-09-17 04:24:55", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "ELYZA-japanese-Llama-2-MoE-2x13B-v0.1-GGUF 概要 Aratako/ELYZA-japanese-Llama-2-MoE-2x13B-v0.1の量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/ELYZA-japanese-Llama-2-MoE-2x13B-v0.1-GGUF", "project_name": "ELYZA-japanese-Llama-2-MoE-2x13B-v0.1-GGUF", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-03-03 12:51:40", "latest_commit": "2024-03-03 13:39:01", "languages": [], "model_or_dataset": "model", "model_size": 21.5, "model_architectures": null, "multi_labels": [ "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "The English document is here モデル概要 Watashiha-Llama-2-13B-Ogiri-sftをAWSのinf2インスタンスで動作するようにコンパイルされたモデルです。 ", "url": "https://huggingface.co/watashiha/Watashiha-Llama-2-13B-Ogiri-sft-neuron", "project_name": "Watashiha-Llama-2-13B-Ogiri-sft-neuron", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-01-24 19:05:34", "latest_commit": "2024-02-02 06:39:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality" ] }, { "description": "Japanese-Alpaca-2-13B Japanese-Alpaca-2-13Bは指示実行モデル、フルモデルです。 ", "url": "https://huggingface.co/owner203/japanese-alpaca-2-13b", "project_name": "japanese-alpaca-2-13b", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-12-20 10:55:29", "latest_commit": "2023-12-26 11:40:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese Stable LM Instruct Gamma 7B +", "url": "https://huggingface.co/ohwi/japanese-stablelm-instruct-gamma-7b-dpo-uf-v0", "project_name": "japanese-stablelm-instruct-gamma-7b-dpo-uf-v0", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-12-21 14:10:58", "latest_commit": "2023-12-23 06:53:31", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models" ] }, { "description": "Shisa 7B Shisa 7B (shisa-7b-v1)", "url": "https://huggingface.co/LoneStriker/shisa-7b-v1-5.0bpw-h6-exl2", "project_name": "shisa-7b-v1-5.0bpw-h6-exl2", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-12-07 18:07:21", "latest_commit": "2023-12-07 18:54:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "ELYZA-japanese-CodeLlama-7b-instruct-GPTQ-calib-ja-1k elyzaさんが公開しているELYZA-japanese-CodeLlama-7b-instructを日本語のキャリブレーションセットで生成したGPTQモデルになります。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-CodeLlama-7b-instruct-GPTQ-calib-ja-1k", "project_name": "ELYZA-japanese-CodeLlama-7b-instruct-GPTQ-calib-ja-1k", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-11-15 16:33:25", "latest_commit": "2023-11-16 14:28:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Language Models" ] }, { "description": "modernbert-base-japanese-char Model Description", "url": "https://huggingface.co/KoichiYasuoka/modernbert-base-japanese-char", "project_name": "modernbert-base-japanese-char", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2025-02-18 15:23:19", "latest_commit": "2025-02-26 22:29:55", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ModernBertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Semantic Text Processing" ] }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/zaq-hack/Orion-14B-LongChat-bpw600-h6-exl2", "project_name": "Orion-14B-LongChat-bpw600-h6-exl2", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-01-25 16:58:27", "latest_commit": "2024-01-25 21:01:29", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OrionForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Our Models Vecteus Ninja-v1 Ninja-v1-NSFW Ninja-v1-128k Ninja-v1-NSFW-128k Model Card for Ninja-v1-128k The Mistral-7B--based Large Language Model (LLM) is an noveldataset fine-tuned version of the Mistral-7B-v0.1 Ninja-128k has the following changes compared to Mistral-7B-v0.1.", "url": "https://huggingface.co/Local-Novel-LLM-project/Ninja-v1-128k", "project_name": "Ninja-v1-128k", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-05-01 02:56:38", "latest_commit": "2024-05-04 04:07:00", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "karakuri-MS-01 モデルの詳細は、こちらです。", "url": "https://huggingface.co/sbtom/karakuri-MS-01", "project_name": "karakuri-MS-01", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-04-18 22:46:13", "latest_commit": "2024-04-19 00:32:29", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Style-Bert-VITS2 Japanese Only Sakura Miko こちらは「さくらみこ」の音声データセットに基づいて学習されたVITS-TTSモデルです。 ", "url": "https://huggingface.co/Lycoris53/style-bert-vits2-sakura-miko", "project_name": "style-bert-vits2-sakura-miko", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-05-27 14:58:38", "latest_commit": "2024-05-28 03:02:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "https://huggingface.co/llm-book/bert-base-japanese-v3-marc_ja with ONNX weights to be compatible with Transformers PHP bert-base-japanese-v3-marc_ja 「大規模言語モデル入門」の第5章で紹介している(感情分析)のモデルです。 ", "url": "https://huggingface.co/masato12/bert-base-japanese-v3-marc_ja", "project_name": "bert-base-japanese-v3-marc_ja", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-07-14 22:31:05", "latest_commit": "2024-07-14 22:33:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "m2v-LaBSE-distilled Model Card", "url": "https://huggingface.co/hs-hf/m2v-LaBSE-distilled", "project_name": "m2v-LaBSE-distilled", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-10-15 05:18:39", "latest_commit": "2024-10-15 05:35:04", "languages": [], "model_or_dataset": "model", "model_size": 0.192, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "このモデルは、text-embeddings-inference (TEI) で、mecab / unidic などを用いた日本語Tokenizerのモデルを、dummy の tokenizer.json を用いて無理やり動かす方法のサンプルです。", "url": "https://huggingface.co/hotchpotch/ruri-base-dummy-fast-tokenizer-for-tei", "project_name": "ruri-base-dummy-fast-tokenizer-for-tei", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-09-30 06:25:36", "latest_commit": "2024-09-30 07:19:13", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ChatGLM3-6B是一个中英双语大模型，本项目为ChatGLM3-6B加入日文能力。", "url": "https://huggingface.co/dummy-foo/ChatGLM3-Japanese", "project_name": "ChatGLM3-Japanese", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-05-31 22:51:11", "latest_commit": "2024-06-09 15:37:04", "languages": [], "model_or_dataset": "model", "model_size": 6.35, "model_architectures": "ChatGLMForConditionalGeneration", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Llama-3.1-70B-EZO-1.1-it-gguf HODACHIさんが公開しているHODACHI/Llama-3.1-70B-EZO-1.1-itのgguf版です。", "url": "https://huggingface.co/keitokei1994/Llama-3.1-70B-EZO-1.1-it-GGUF", "project_name": "Llama-3.1-70B-EZO-1.1-it-GGUF", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-08-01 01:18:06", "latest_commit": "2024-08-01 02:25:32", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ebisuke/liz-nojaloli-ja-ds License MIT License Description ebisuke/liz-nojaloli-jaの学習元のデータセットです。", "url": "https://huggingface.co/datasets/ebisuke/liz-nojaloli-ja-ds", "project_name": "liz-nojaloli-ja-ds", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-05-23 06:24:13", "latest_commit": "2023-08-17 09:57:13", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Overview This dataset is edited from kunishou/databricks-dolly-15k-en.", "url": "https://huggingface.co/datasets/WarriorMama777/databricks-dolly-15k-ja_cool", "project_name": "databricks-dolly-15k-ja_cool", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-07-08 06:16:29", "latest_commit": "2023-07-08 15:45:18", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Emotion Analysis", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "oasst1-89k-ja , databricks-dolly-15k-ja , hh-rlhf-49k-ja の中から JGLUE（ JcommonsenseQA , MARC-ja , JSQuAD ）の観点で高品質なデータセットに絞り込んだデータセットです。 ", "url": "https://huggingface.co/datasets/kunishou/jp-effective-instructions", "project_name": "jp-effective-instructions", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2023-12-01 07:36:25", "latest_commit": "2023-12-01 07:52:41", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "このデータセットは「私の推しは悪役令嬢。", "url": "https://huggingface.co/datasets/aipracticecafe/wataoshi-dialogues-rp", "project_name": "wataoshi-dialogues-rp", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-04-21 16:36:09", "latest_commit": "2024-04-26 10:49:19", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "ポケモン(VGC)のレギュレーションF ルールにおける選出データを記録したデータセットです。 ", "url": "https://huggingface.co/datasets/fufufukakaka/pokemon_battle_team_dataset_regulation_f", "project_name": "pokemon_battle_team_dataset_regulation_f", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-06-08 08:26:54", "latest_commit": "2024-06-09 07:16:16", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "#Dataset Card for DancingPrismPJ/wikipedia-horse-dataset Wikipediaの「Category:日本調教の競走馬」内の記事に固有表現ラベルを付与したデータセットです。 ", "url": "https://huggingface.co/datasets/DancingPrismPJ/Wikipedia-Horse-Dataset", "project_name": "Wikipedia-Horse-Dataset", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2025-01-16 04:41:42", "latest_commit": "2025-01-20 03:25:50", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "ime-and-kakko elyza/ELYZA-tasks-100 中の IME のように変換候補を提示するタスクカッコの対応関係を整えるタスクのバリエーションを手で作成したもの東京大学松尾・岩澤研究室（松尾研）大規模言語モデル Deep Learning 応用講座 2024 で開催されたコンペティションにおいて、 @pokutuna が作成したモデルの苦手問題を克服するために作成しました。", "url": "https://huggingface.co/datasets/pokutuna/tasks-ime-and-kakko-jp", "project_name": "tasks-ime-and-kakko-jp", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2025-01-13 14:34:01", "latest_commit": "2025-01-13 14:40:46", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Nhentai Dataset A collection of Japanese manga in CBZ format from Nhentai, containing adult content manga with associated metadata.", "url": "https://huggingface.co/datasets/infinity-blackhole/nhentai", "project_name": "nhentai", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2025-02-08 15:37:40", "latest_commit": "2025-02-08 17:47:54", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "Magpie-Qwen-Turbo-27k Aratako/Magpie-Tanuki-8B-annotated-96k のアノテーションを利用して件数を減らし、outputをqwen-2.5-turboで再生成したSFT用の26728件のサブセットです。 ", "url": "https://huggingface.co/datasets/hama-jp/magpie-qwen-turbo-27k", "project_name": "magpie-qwen-turbo-27k", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-12-24 15:26:04", "latest_commit": "2024-12-24 16:18:58", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "TALPCoデータセットの日英翻訳ペアをHuggingFace形式に変換したデータセットです。", "url": "https://huggingface.co/datasets/hpprc/TALPCo", "project_name": "TALPCo", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-12-01 09:31:51", "latest_commit": "2024-12-01 09:37:29", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Annotation and Dataset Development" ] }, { "description": "A more aggressively cleaned up version of Calvin-Xu/Furigana-Aozora-Speech, which consists of 2,536,041 out of the 3,361,443 entries generated from the raw data 青空文庫及びサピエの音声デイジーデータから作成した振り仮名注釈付き音声コーパスのデータセット https://github.com/ndl-lab/hurigana-speech-corpus-aozora.", "url": "https://huggingface.co/datasets/Calvin-Xu/FLFL-Aozora-Speech-Train", "project_name": "FLFL-Aozora-Speech-Train", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-08-01 21:09:24", "latest_commit": "2024-08-22 01:27:38", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Speech & Audio in NLP" ] }, { "description": "各レコードのurl列が出典となります。", "url": "https://huggingface.co/datasets/numad/yuho-text-2014-2022", "project_name": "yuho-text-2014-2022", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-06-15 00:24:41", "latest_commit": "2024-06-15 00:29:42", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Annotation and Dataset Development" ] }, { "description": "概要 reazon-research/reazonspeech-v2[all]をWADA SNRにて音声品質の分析を行った結果です。 ", "url": "https://huggingface.co/datasets/ayousanz/reazon-speech-v2-all-WAND-SNR-analyze", "project_name": "reazon-speech-v2-all-WAND-SNR-analyze", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-03-31 14:13:56", "latest_commit": "2024-04-03 12:31:56", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Veterinary Medicine Japanese Dataset This dataset contains audio files of veterinary medicine terms in Japanese, categorized into drugs, diseases, and symptoms.", "url": "https://huggingface.co/datasets/BigleBomb/japanese-vet-terms", "project_name": "japanese-vet-terms", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Extraction & Text Mining", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "LLM-jp Corpus v3の日本語部分のwikipedia以外のミラーです。 ", "url": "https://huggingface.co/datasets/kajuma/llm-jp-corpus-v3-ja", "project_name": "llm-jp-corpus-v3-ja", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Synthetic-Japanese-Roleplay-gpt-4o-mini-19.8k-formatted 概要 gpt-4o-miniを用いて作成した日本語ロールプレイデータセットであるAratako/Synthetic-Japanese-Roleplay-gpt-4o-mini-19.8kにsystem messageを追加して整形したデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-Japanese-Roleplay-gpt-4o-mini-19.8k-formatted", "project_name": "Synthetic-Japanese-Roleplay-gpt-4o-mini-19.8k-formatted", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-08-16 16:46:06", "latest_commit": "2024-08-16 16:54:03", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "kunishou/OpenMathInstruct-1-1.8m-ja のquestion_jaをもとにphi-3-mediumによりプログラミング言語を用いない形式で生成したデータセットです。", "url": "https://huggingface.co/datasets/misdelivery/OpenMathInstruct-ja-phi-3-medium-test", "project_name": "OpenMathInstruct-ja-phi-3-medium-test", "downloads": 12, "source": "Hugging Face", "score": -0.05201976521487016, "first_commit": "2024-05-30 11:34:11", "latest_commit": "2024-05-31 12:00:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese-Novel-Reward-TinySwallow-1.5B このモデルはSakanaAI/TinySwallow-1.5Bをファインチューニングして作成された日本語小説の品質評価のためのRewardモデルです。 ", "url": "https://huggingface.co/Aratako/Japanese-Novel-Reward-TinySwallow-1.5B", "project_name": "Japanese-Novel-Reward-TinySwallow-1.5B", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2025-03-02 15:18:17", "latest_commit": "2025-03-04 15:24:22", "languages": [], "model_or_dataset": "model", "model_size": 1.54, "model_architectures": "Qwen2ForSequenceClassification", "multi_labels": [ "Information Extraction & Text Mining" ] }, { "description": "bert-base-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-luw-upos", "project_name": "bert-base-japanese-luw-upos", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2021-10-26 13:26:38", "latest_commit": "2022-09-18 19:43:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Representation Learning", "Syntactic Text Processing", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "ELECTRA small Japanese discriminator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-small-paper-japanese-discriminator", "project_name": "electra-small-paper-japanese-discriminator", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2021-10-04 13:48:17", "latest_commit": "2022-12-09 00:38:44", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForPreTraining", "multi_labels": [ "Language Models" ] }, { "description": "ELECTRA small Japanese finance discriminator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-small-japanese-fin-discriminator", "project_name": "electra-small-japanese-fin-discriminator", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2021-10-04 14:06:48", "latest_commit": "2022-12-09 00:42:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForPreTraining", "multi_labels": [ "Language Models" ] }, { "description": "yacis-electra-small", "url": "https://huggingface.co/ptaszynski/yacis-electra-small-japanese", "project_name": "yacis-electra-small-japanese", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-01-12 01:48:13", "latest_commit": "2022-01-13 01:43:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "deberta-large-japanese-aozora-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-aozora-ud-head", "project_name": "deberta-large-japanese-aozora-ud-head", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-06-17 15:00:25", "latest_commit": "2023-03-04 20:17:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Semantic Text Processing" ] }, { "description": "ESを書くAI Japanese GPT-2 modelをファインチューニングしましたファインチューニングには、あらゆる分野から140,000件ほどのESを用いました。 ", "url": "https://huggingface.co/huranokuma/es2", "project_name": "es2", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-08-09 08:20:00", "latest_commit": "2022-08-20 04:26:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "bart-base-japanese This model is converted from the original Japanese BART Pretrained model released by Kyoto University.", "url": "https://huggingface.co/Formzu/bart-base-japanese", "project_name": "bart-base-japanese", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-10-31 06:52:38", "latest_commit": "2022-11-07 11:13:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MBartForConditionalGeneration", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "roberta-base-japanese-aozora-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-aozora-ud-goeswith", "project_name": "roberta-base-japanese-aozora-ud-goeswith", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-10-15 04:01:29", "latest_commit": "2024-08-20 18:49:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "Japanese BERT-base (MeCab + BPE) How to load the tokenizer Please download the dictionary file for MeCab + BPE from our GitHub repository.", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_mecab-bpe", "project_name": "bert-base-japanese_mecab-bpe", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-06-14 06:57:23", "latest_commit": "2023-06-16 01:00:52", "languages": [], "model_or_dataset": "model", "model_size": 0.109, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "こちらでアップロードできないので、civitaiにて先に公開しています。 ", "url": "https://huggingface.co/sazyou-roukaku/AfterRealXL", "project_name": "AfterRealXL", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-09-23 08:43:02", "latest_commit": "2023-10-01 18:12:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "transformers-ud-japanese-electra-ginza-520 (sudachitra-wordpiece, mC4 Japanese)", "url": "https://huggingface.co/megagonlabs/transformers-ud-japanese-electra-base-ginza-520", "project_name": "transformers-ud-japanese-electra-base-ginza-520", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-09-21 14:14:04", "latest_commit": "2023-09-21 17:45:45", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraModel", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese BERT-base (Juman++ + Unigram)", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_jumanpp-unigram", "project_name": "bert-base-japanese_jumanpp-unigram", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-06-14 07:02:01", "latest_commit": "2023-06-16 01:02:48", "languages": [], "model_or_dataset": "model", "model_size": 0.109, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese BERT-base (Juman++ + BPE) How to load the tokenizer Please download the dictionary file for Juman++ + BPE from our GitHub repository.", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_jumanpp-bpe", "project_name": "bert-base-japanese_jumanpp-bpe", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-06-14 07:01:02", "latest_commit": "2023-06-16 01:02:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "ku-accms/roberta-base-japanese-ssuw Model description This is a pre-trained Japanese RoBERTa base model for super short unit words (SSUW).", "url": "https://huggingface.co/ku-accms/roberta-base-japanese-ssuw", "project_name": "roberta-base-japanese-ssuw", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-04-11 14:05:02", "latest_commit": "2023-04-12 04:44:02", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "ELECTRA small Japanese discriminator for Irony", "url": "https://huggingface.co/kit-nlp/electra-small-japanese-discriminator-irony", "project_name": "electra-small-japanese-discriminator-irony", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-11-07 07:14:32", "latest_commit": "2022-11-08 04:11:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForSequenceClassification", "multi_labels": [ "Stylistic Analysis", "Sentiment Analysis" ] }, { "description": "YACIS ELECTRA Small Japanese for Irony", "url": "https://huggingface.co/kit-nlp/yacis-electra-small-japanese-irony", "project_name": "yacis-electra-small-japanese-irony", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-11-07 07:05:34", "latest_commit": "2022-11-08 04:16:30", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForSequenceClassification", "multi_labels": [ "Stylistic Analysis", "Sentiment Analysis" ] }, { "description": "deberta-base-japanese-wikipedia-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia-ud-goeswith", "project_name": "deberta-base-japanese-wikipedia-ud-goeswith", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-09-18 06:02:55", "latest_commit": "2024-08-20 19:38:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "deberta-large-japanese-wikipedia-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-wikipedia-ud-head", "project_name": "deberta-large-japanese-wikipedia-ud-head", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-07-06 03:51:14", "latest_commit": "2024-08-20 19:51:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-large-japanese-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-upos", "project_name": "deberta-large-japanese-upos", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-05-27 06:50:55", "latest_commit": "2024-07-26 16:00:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Syntactic Parsing", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "deberta-base-japanese-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-upos", "project_name": "deberta-base-japanese-upos", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-05-24 08:12:05", "latest_commit": "2024-07-26 15:59:24", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Information Retrieval", "Syntactic Parsing", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "Google's mt5-base fine-tuned in Japanese to summarize patent claims in a limited Pharmaceutical domain. ", "url": "https://huggingface.co/kz/mt5base-finetuned-patentsum-japanese-small", "project_name": "mt5base-finetuned-patentsum-japanese-small", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2021-04-10 00:31:15", "latest_commit": "2022-05-19 06:50:32", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MT5ForConditionalGeneration", "multi_labels": [ "Language Models", "Semantic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "jpn-msa source group: Japanese target group: Malay (macrolanguage) OPUS readme: jpn-msa model: transformer-align source language(s): jpn jpn_Hani jpn_Hira jpn_Kana target language(s): ind", "url": "https://huggingface.co/Helsinki-NLP/opus-mt-ja-ms", "project_name": "opus-mt-ja-ms", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2020-08-19 00:29:11", "latest_commit": "2023-08-16 11:59:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MarianMTModel", "multi_labels": [ "Multilinguality", "Language Models", "Semantic Text Processing" ] }, { "description": "日本語VL-T5事前学習済みモデル", "url": "https://huggingface.co/sonoisa/vl-t5-base-japanese", "project_name": "vl-t5-base-japanese", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2021-10-03 11:54:43", "latest_commit": "2021-10-04 11:13:35", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VLT5ModelWrapper", "multi_labels": [ "Visual Data in NLP", "Text Generation", "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "roberta-base-japanese-aozora-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-aozora-ud-head", "project_name": "roberta-base-japanese-aozora-ud-head", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-06-21 05:21:38", "latest_commit": "2024-08-20 19:52:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForQuestionAnswering", "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Semantic Text Processing" ] }, { "description": "transformers-ud-japanese-electra-ginza (sudachitra-wordpiece, mC4 Japanese)", "url": "https://huggingface.co/megagonlabs/transformers-ud-japanese-electra-base-ginza", "project_name": "transformers-ud-japanese-electra-base-ginza", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2021-08-23 09:54:23", "latest_commit": "2021-09-22 11:00:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForPreTraining", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "ELECTRA small Japanese discriminator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-small-japanese-discriminator", "project_name": "electra-small-japanese-discriminator", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2021-10-04 13:42:57", "latest_commit": "2022-12-09 00:41:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForPreTraining", "multi_labels": [ "Language Models" ] }, { "description": "nagisa_bert A BERT model for nagisa.", "url": "https://huggingface.co/taishi-i/nagisa_bert", "project_name": "nagisa_bert", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-09-25 13:12:57", "latest_commit": "2023-09-15 01:28:14", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForPreTraining", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "JAINU-Model (T5 fine-tuned model) JAINU is a Japanese - Ainu language machine translation model. ", "url": "https://huggingface.co/astremo/JAINU", "project_name": "JAINU", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-04-30 13:57:31", "latest_commit": "2022-05-22 05:51:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration", "multi_labels": [ "Multilinguality", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese_Fined_Tuned_Whisper_Model", "url": "https://huggingface.co/NadiaHolmlund/Japanese_Fine_Tuned_Whisper_Model", "project_name": "Japanese_Fine_Tuned_Whisper_Model", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-03-12 13:28:06", "latest_commit": "2023-03-13 09:19:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "AIgroup-CVM-utokyohospital/Llama-2-70b-chat-4bit-japanese This model is Llama-2-Chat 70B fine-tuned with a part of the Japanese instruction dataset named izumi-lab/llm-japanese-dataset.", "url": "https://huggingface.co/AIgroup-CVM-utokyohospital/Llama-2-70b-chat-4bit-japanese", "project_name": "Llama-2-70b-chat-4bit-japanese", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-08-10 06:15:19", "latest_commit": "2023-10-01 08:41:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [] }, { "description": "Fine-tuned XLSR-53 large model for speech recognition in Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the train and validation splits of Common Voice 6.1, CSS10 and JSUT.", "url": "https://huggingface.co/Gustav114514/work", "project_name": "work", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-01-11 05:39:33", "latest_commit": "2024-01-11 05:52:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Text Generation", "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "The English document is here. ", "url": "https://huggingface.co/watashiha/Watashiha-Llama-2-13B-Ogiri-sft", "project_name": "Watashiha-Llama-2-13B-Ogiri-sft", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-01-19 06:59:08", "latest_commit": "2024-03-04 05:24:31", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ベースモデル：cl-tohoku/bert-base-japanese-whole-word-masking データセット：llm-book/wrime-sentiment オプティマイザ: adamw Optunaでハイパーパラメータ探索学習率スケジュールのタイプ(lr_scheduler_type):", "url": "https://huggingface.co/A-Funakoshi/bert-base-japanese-v3-wrime-v2", "project_name": "bert-base-japanese-v3-wrime-v2", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-10-27 12:05:45", "latest_commit": "2023-10-27 12:16:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing", "Sentiment Analysis" ] }, { "description": "Japanese BERT-base (Vaporetto + WordPiece) How to load the tokenizer Please download the dictionary file for Vaporetto + WordPiece from our GitHub repository.", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_vaporetto-wordpiece", "project_name": "bert-base-japanese_vaporetto-wordpiece", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-06-14 07:19:46", "latest_commit": "2023-06-16 01:06:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "Aerner LM-v1 事前学習から全部日本語で学習させたモデルです。 ", "url": "https://huggingface.co/aerner/lm-v1", "project_name": "lm-v1", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-05-25 12:35:32", "latest_commit": "2023-05-25 13:35:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Text Generation", "Language Models" ] }, { "description": "調整したい方向とは少しずれたが、AIの口調は女の子風に。 ", "url": "https://huggingface.co/Akimite/Gemma2-9b-it-Girl-v1", "project_name": "Gemma2-9b-it-Girl-v1", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2025-01-24 08:42:28", "latest_commit": "2025-01-24 09:28:48", "languages": [], "model_or_dataset": "model", "model_size": 9.24, "model_architectures": "Gemma2ForCausalLM", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese-LLaMA-2-7B Japanese-LLaMA-2-7Bは基盤モデル、フルモデルです。 ", "url": "https://huggingface.co/owner203/japanese-llama-2-7b", "project_name": "japanese-llama-2-7b", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-01-22 01:45:06", "latest_commit": "2024-06-05 02:24:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "お知らせより回答が適切になるように学習させたモデル、https://huggingface.co/hotchpotch/youri-7b-stf-qa-context-jaqket-jsquad-gptq もあります。 ", "url": "https://huggingface.co/hotchpotch/youri-7b-sft-qa-context-jaqket-gptq", "project_name": "youri-7b-sft-qa-context-jaqket-gptq", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-12-08 03:51:28", "latest_commit": "2024-02-25 06:40:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Natural Language Interfaces", "Language Models", "Semantic Text Processing" ] }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/sosoai/Orion-14B-Chat-RAG-safetensors", "project_name": "Orion-14B-Chat-RAG-safetensors", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-01-25 00:05:06", "latest_commit": "2024-01-25 02:09:15", "languages": [], "model_or_dataset": "model", "model_size": 14.5, "model_architectures": "OrionForCausalLM", "multi_labels": [ "Multilinguality", "Language Models" ] }, { "description": "jvnvコーパスのF2から学習して作成したbert-vits2の日本語モデルです。 ", "url": "https://huggingface.co/yasyune/bert_vits2_2.2_jvnv", "project_name": "bert_vits2_2.2_jvnv", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-12-13 07:52:26", "latest_commit": "2023-12-13 07:54:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese BERT-base (Nothing + WordPiece) How to load the tokenizer Please download the dictionary file for Nothing + WordPiece from our GitHub repository.", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_nothing-wordpiece", "project_name": "bert-base-japanese_nothing-wordpiece", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-06-14 08:08:06", "latest_commit": "2023-06-16 01:07:33", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Text Segmentation", "Language Models", "Semantic Text Processing" ] }, { "description": "このモデルはluke-japanese-baseをファインチューニングして、JCommonsenseQA(選択式応答)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-commonsenseqa", "project_name": "luke-japanese-base-commonsenseqa", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-01-29 15:40:46", "latest_commit": "2023-05-26 15:04:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForMultipleChoice", "multi_labels": [ "Reasoning", "Language Models", "Commonsense Reasoning", "Semantic Text Processing" ] }, { "description": "Whisper Small JA - Lorenzo Concina", "url": "https://huggingface.co/lorenzoncina/whisper-small-ja", "project_name": "whisper-small-ja", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-01-06 10:46:43", "latest_commit": "2023-01-09 22:17:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [] }, { "description": "electra-base-japanese-discriminator (sudachitra-wordpiece, mC4 Japanese) -", "url": "https://huggingface.co/hiroshi-matsuda-rit/electra-base-japanese-discriminator-v2", "project_name": "electra-base-japanese-discriminator-v2", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-05-07 08:14:12", "latest_commit": "2023-05-07 17:41:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForPreTraining", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models" ] }, { "description": "This model is traned with guanaco dataset.", "url": "https://huggingface.co/ganchengguang/Yoko-7B-Japanese-v1", "project_name": "Yoko-7B-Japanese-v1", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-08-10 13:01:38", "latest_commit": "2023-08-10 13:11:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [] }, { "description": "Wav2Vec2-XLS-R-300M-Japanese-Hiragana Fine-tuned facebook/wav2vec2-xls-r-300m on Japanese Hiragana characters using the Common Voice and JSUT.", "url": "https://huggingface.co/slplab/wav2vec2-xls-r-300m-japanese-hiragana", "project_name": "wav2vec2-xls-r-300m-japanese-hiragana", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2022-09-16 07:34:58", "latest_commit": "2022-09-16 11:01:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "MambaSan-370m-instruct 🐍 MambaSan-instruct is the first chat Japanese language model based on a state-space model architecture (Mamba).", "url": "https://huggingface.co/loiccabannes/MambaSan-370m-instruct", "project_name": "MambaSan-370m-instruct", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-02-11 22:40:33", "latest_commit": "2024-02-11 22:47:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "Japanese Stable LM Instruct Gamma 7B Model Description", "url": "https://huggingface.co/LoneStriker/stabilityai_japanese-stablelm-instruct-gamma-7b-3.0bpw-h6-exl2", "project_name": "stabilityai_japanese-stablelm-instruct-gamma-7b-3.0bpw-h6-exl2", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-10-28 20:16:15", "latest_commit": "2023-10-28 15:16:25", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "ESPnet2 TTS pretrained model kan-bayashi/jsut_full_band_vits_prosody ♻", "url": "https://huggingface.co/espnet/kan-bayashi_jsut_full_band_vits_prosody", "project_name": "kan-bayashi_jsut_full_band_vits_prosody", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2021-10-23 20:47:11", "latest_commit": "2021-10-23 16:47:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Language Models" ] }, { "description": "Japanese Stable LM Instruct Gamma 7B Model Description", "url": "https://huggingface.co/LoneStriker/stabilityai_japanese-stablelm-instruct-gamma-7b-8.0bpw-h6-exl2", "project_name": "stabilityai_japanese-stablelm-instruct-gamma-7b-8.0bpw-h6-exl2", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-10-28 20:43:59", "latest_commit": "2023-10-28 15:44:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese Stable LM Instruct Gamma 7B Model Description", "url": "https://huggingface.co/LoneStriker/stabilityai_japanese-stablelm-instruct-gamma-7b-6.0bpw-h6-exl2", "project_name": "stabilityai_japanese-stablelm-instruct-gamma-7b-6.0bpw-h6-exl2", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-10-28 20:36:54", "latest_commit": "2023-10-28 15:37:11", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese Stable LM Instruct Gamma 7B Model Description", "url": "https://huggingface.co/LoneStriker/stabilityai_japanese-stablelm-instruct-gamma-7b-5.0bpw-h6-exl2", "project_name": "stabilityai_japanese-stablelm-instruct-gamma-7b-5.0bpw-h6-exl2", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-10-28 20:29:59", "latest_commit": "2023-10-28 15:30:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "tiny_mixtral_jaをinstruction用のデータセットでtrainingしたものですhttps://huggingface.co/if001/tiny_mixtral_ja", "url": "https://huggingface.co/if001/tiny_mixtral_ja_instruction", "project_name": "tiny_mixtral_ja_instruction", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-01-31 11:28:29", "latest_commit": "2024-02-02 21:06:13", "languages": [], "model_or_dataset": "model", "model_size": 0.276, "model_architectures": "MixtralForCausalLM", "multi_labels": [] }, { "description": "Japanese Stable LM Instruct Gamma 7B Model Description", "url": "https://huggingface.co/LoneStriker/stabilityai_japanese-stablelm-instruct-gamma-7b-4.0bpw-h6-exl2", "project_name": "stabilityai_japanese-stablelm-instruct-gamma-7b-4.0bpw-h6-exl2", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-10-28 20:23:03", "latest_commit": "2023-10-28 15:23:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", "url": "https://huggingface.co/LoneStriker/SambaLingo-Japanese-Chat-6.0bpw-h6-exl2", "project_name": "SambaLingo-Japanese-Chat-6.0bpw-h6-exl2", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-03-07 06:55:28", "latest_commit": "2024-03-07 06:57:49", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "https://huggingface.co/kotoba-tech/kotoba-whisper-v1.1 上記のモデルを訓練し、アダルト用語を認識できるようにしたものです。", "url": "https://huggingface.co/swdq/Visual-novel-whisper", "project_name": "Visual-novel-whisper", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-07-24 10:09:29", "latest_commit": "2024-07-24 10:29:47", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Pretrained LM beomi/Llama-3-Open-Ko-8B (MIT License)", "url": "https://huggingface.co/traintogpb/llama-3-mmt-xml-it-sft-adapter", "project_name": "llama-3-mmt-xml-it-sft-adapter", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-09-27 04:41:59", "latest_commit": "2024-11-04 07:38:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ELYZA-japanese-Llama-2-7b Model Description ELYZA-japanese-Llama-2-7b は、 Llama2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/furnqse/elyza-fork2", "project_name": "elyza-fork2", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-04-13 08:34:05", "latest_commit": "2024-04-13 08:34:47", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "paper: 強化学習を用いてキャラクタらしさを付与した雑談応答の生成", "url": "https://huggingface.co/tealgreen0503/japanese-gpt2-medium-ppo-araisan", "project_name": "japanese-gpt2-medium-ppo-araisan", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-04-29 09:24:29", "latest_commit": "2024-04-29 09:41:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2HeadWithValueModel", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", "url": "https://huggingface.co/LoneStriker/SambaLingo-Japanese-Chat-3.0bpw-h6-exl2", "project_name": "SambaLingo-Japanese-Chat-3.0bpw-h6-exl2", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-03-07 06:50:19", "latest_commit": "2024-03-07 06:51:42", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "モデル説明 (model explanation) YaguruMagiku 0.6 : AbyssOrangeMix2_sfw 0.4 マージ元のルーツにNAIリークが含まれるという噂があるので、NAIリークアンチには非推奨理想の黒髪ポニテ顔が出せるYaguruMagikuを、ある程度顔が近くて制御しやすいAbyssOrangeMix2と混ぜてみた。 ", "url": "https://huggingface.co/ThePioneer/MoeDiffusion", "project_name": "MoeDiffusion", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-01-18 11:14:31", "latest_commit": "2023-01-21 02:10:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Example ESPnet2 TTS model kan-bayashi/jsut_transformer_accent ♻", "url": "https://huggingface.co/espnet/kan-bayashi_jsut_transformer_accent", "project_name": "kan-bayashi_jsut_transformer_accent", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2021-07-03 14:51:41", "latest_commit": "2021-07-03 10:51:43", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Example ESPnet2 TTS model kan-bayashi/jsut_tacotron2_accent ♻", "url": "https://huggingface.co/espnet/kan-bayashi_jsut_tacotron2_accent", "project_name": "kan-bayashi_jsut_tacotron2_accent", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2021-07-03 14:51:34", "latest_commit": "2021-07-03 10:51:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Example ESPnet2 TTS model kan-bayashi/jsut_fastspeech ♻", "url": "https://huggingface.co/espnet/kan-bayashi_jsut_fastspeech", "project_name": "kan-bayashi_jsut_fastspeech", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2021-07-03 14:44:06", "latest_commit": "2021-07-03 10:44:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "HuggingFaceFW/fineweb-edu-classifierを再現するために、日本語データでpkshatech/GLuCoSE-base-jaを学習したモデルです。 ", "url": "https://huggingface.co/Kendamarron/fineweb-edu-classifier-ja", "project_name": "fineweb-edu-classifier-ja", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-06-10 13:17:55", "latest_commit": "2024-06-14 13:28:45", "languages": [], "model_or_dataset": "model", "model_size": 0.133, "model_architectures": "LukeForSequenceClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "KoichiYasuoka/karasu-1.1B-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/karasu-1.1B-upos", "project_name": "karasu-1.1B-upos", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-08-30 04:04:28", "latest_commit": "2024-08-30 13:13:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "rinna-gpt2-small-japanese-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/rinna-gpt2-small-japanese-ud-causal", "project_name": "rinna-gpt2-small-japanese-ud-causal", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-09-07 07:49:03", "latest_commit": "2024-09-12 22:27:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "goldfish-gpt2-japanese-10mb-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/goldfish-gpt2-japanese-10mb-ud-causal", "project_name": "goldfish-gpt2-japanese-10mb-ud-causal", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-09-09 03:38:16", "latest_commit": "2024-09-12 22:39:58", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Tanuki-8B-dpo-v1.0-4k-GPTQ-8bit 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0-4kのGPTQ 8bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-4k-GPTQ-8bit", "project_name": "Tanuki-8B-dpo-v1.0-4k-GPTQ-8bit", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-08-27 18:32:19", "latest_commit": "2024-09-03 09:27:46", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "gpt2-medium-japanese-unidic-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/gpt2-medium-japanese-unidic-ud-causal", "project_name": "gpt2-medium-japanese-unidic-ud-causal", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-08-30 13:44:48", "latest_commit": "2024-08-30 22:48:46", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "gpt2-small-japanese-juman-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/gpt2-small-japanese-juman-upos", "project_name": "gpt2-small-japanese-juman-upos", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-08-30 10:23:29", "latest_commit": "2024-09-12 22:49:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "calm3-22b-RP-v0.1 cyberagent/calm3-22b-chatをベースにロールプレイ用にQLoRAでファインチューニングしたモデルです。 ", "url": "https://huggingface.co/Aratako/calm3-22b-RP-v0.1", "project_name": "calm3-22b-RP-v0.1", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-08-19 11:50:17", "latest_commit": "2024-08-21 10:51:24", "languages": [], "model_or_dataset": "model", "model_size": 22.5, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "NikolayKozloff/h2o-Llama-3-8B-Japanese-Instruct-Q8_0-GGUF", "url": "https://huggingface.co/NikolayKozloff/h2o-Llama-3-8B-Japanese-Instruct-Q8_0-GGUF", "project_name": "h2o-Llama-3-8B-Japanese-Instruct-Q8_0-GGUF", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-06-24 13:27:54", "latest_commit": "2024-06-24 13:28:33", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This repository contains a model trained (QLoRA-SFT)", "url": "https://huggingface.co/taoki/phi3-mini-4k-qlora-jmultiwoz-dolly-amenokaku-alpaca_jp_python-GGUF", "project_name": "phi3-mini-4k-qlora-jmultiwoz-dolly-amenokaku-alpaca_jp_python-GGUF", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-05-29 15:11:10", "latest_commit": "2024-05-31 11:28:45", "languages": [], "model_or_dataset": "model", "model_size": 3.82, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "概要「LOCAL AI HACKATHON」における、チームDataPilot,4つめの成果品です。", "url": "https://huggingface.co/DataPilot/ArrowSmartPlus_3.6B_instruction", "project_name": "ArrowSmartPlus_3.6B_instruction", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-03-27 08:00:28", "latest_commit": "2024-03-27 08:26:56", "languages": [], "model_or_dataset": "model", "model_size": 3.56, "model_architectures": "GPTNeoXForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "TigerBot-7B Japanese", "url": "https://huggingface.co/atsuki-yamaguchi/tigerbot-7b-base-random-ja", "project_name": "tigerbot-7b-base-random-ja", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-04-21 16:43:53", "latest_commit": "2024-04-22 09:05:05", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "gpt2-medium-japanese-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/gpt2-medium-japanese-upos", "project_name": "gpt2-medium-japanese-upos", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-06-22 22:39:14", "latest_commit": "2024-07-27 07:49:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "モデルの説明(English explanation is below.", "url": "https://huggingface.co/keitokei1994/Llama-3-8B-shisa-2x8B", "project_name": "Llama-3-8B-shisa-2x8B", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-05-24 18:58:48", "latest_commit": "2024-06-11 07:41:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MixtralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Chatvector-llava-v1.5-plus-Houou-v3-7b Model Card Model Details ※好奇心から生まれたモデルです。", "url": "https://huggingface.co/shinyice/chatvector-llava-v1.5-plus-houou-v3-7b", "project_name": "chatvector-llava-v1.5-plus-houou-v3-7b", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-06-04 04:24:06", "latest_commit": "2024-06-04 05:12:10", "languages": [], "model_or_dataset": "model", "model_size": 7.06, "model_architectures": "LlavaLlamaForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "概要 GLM-4-9B-Chatを、日本語のWikiデータを選定し、追加学習した日本語に非常に強いスコアを出したモデルです。 ", "url": "https://huggingface.co/HODACHI/glm-4-9b-chat-FT-ja-v0.3", "project_name": "glm-4-9b-chat-FT-ja-v0.3", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-06-07 03:33:33", "latest_commit": "2024-06-09 23:37:02", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ChatGLMForConditionalGeneration", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "RP-7b-instruct 🚨 This model is tuning to RP and knowledge is likely unstable. ", "url": "https://huggingface.co/nitky/RP-7b-instruct", "project_name": "RP-7b-instruct", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-06-01 10:47:22", "latest_commit": "2024-06-01 14:40:25", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Akimite/Qwen2-7b-Instruct-Boku-v2のマイナーチェンジ版です。 ", "url": "https://huggingface.co/Akimite/Qwen2-7b-Instruct-Boku-v3", "project_name": "Qwen2-7b-Instruct-Boku-v3", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-06-13 05:27:28", "latest_commit": "2024-06-15 14:21:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Qwen2ForCausalLM", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Example ESPnet2 TTS model kan-bayashi/jsut_tts_train_fastspeech2_transformer_teacher_raw_phn_jaconv_pyopenjtalk_accent_with_pause_train.loss.ave ♻", "url": "https://huggingface.co/espnet/kan-bayashi_jsut_tts_train_fastspeech2_transformer_teacher_raw_phn_jac-truncated-60fc24", "project_name": "kan-bayashi_jsut_tts_train_fastspeech2_transformer_teacher_raw_phn_jac-truncated-60fc24", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2021-07-03 14:53:30", "latest_commit": "2021-07-03 11:03:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Ja-miracl This dataset represents a conversion of the Japanese (Ja) section from the miracl dataset into the BeIR format, making it compatible for use with mteb.", "url": "https://huggingface.co/datasets/seungwon929/Ja-miracl", "project_name": "Ja-miracl", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-05-07 04:06:27", "latest_commit": "2024-05-07 05:17:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Magpie方式によるprompt抽出をrinna/llama-3-youko-8bで行ってみました。 ", "url": "https://huggingface.co/datasets/kurogane/Magpie_llama-3-youko-8b_prompt_extract_example50", "project_name": "Magpie_llama-3-youko-8b_prompt_extract_example50", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-06-18 12:45:15", "latest_commit": "2024-06-18 12:47:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Language Models", "Low-Resource NLP" ] }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/senryu-marusen\", split=\"train\") 概要月に1万句以上の投稿がある国内最大級の川柳投稿サイト『川柳投稿まるせん』のクロールデータです。", "url": "https://huggingface.co/datasets/YANS-official/senryu-marusen", "project_name": "senryu-marusen", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-08-28 18:49:03", "latest_commit": "2024-08-30 11:41:46", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "データ生成を行う際のSEEDデータには有志の方々が作成したseed_tasks_japanese.jsonlを利用させていただきました。", "url": "https://huggingface.co/datasets/toshi456/Rakuten-Alpaca-Data-32K", "project_name": "Rakuten-Alpaca-Data-32K", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-04-01 13:54:16", "latest_commit": "2024-04-01 14:09:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "This dataset contains passages, each of which consists of consecutive sentences no longer than 400 characters from Japanese Wikipedia as of 2022-04-04.", "url": "https://huggingface.co/datasets/llm-book/jawiki-20220404-c400", "project_name": "jawiki-20220404-c400", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2023-05-05 07:34:52", "latest_commit": "2023-10-25 15:26:19", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Information Retrieval", "Annotation and Dataset Development" ] }, { "description": "弹丸论破的七海千秋语音数据", "url": "https://huggingface.co/datasets/lissette/Nanami-Chiaki-audio", "project_name": "Nanami-Chiaki-audio", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-06-04 06:37:48", "latest_commit": "2024-06-04 09:48:41", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "以下の条件に同意したうえで、公開されたモデル及びデータセット等（以下「本コンテンツ」）といいます）をダウンロードします。 ", "url": "https://huggingface.co/datasets/weblab-GENIAC/jhellaswag", "project_name": "jhellaswag", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "CC-MAIN-2019-49へようこそ本データセットはCommonCrawlerと呼ばれるものから日本語のみを抽出したものです。 ", "url": "https://huggingface.co/datasets/cc-clean/CC-MAIN-2019-49", "project_name": "CC-MAIN-2019-49", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "language: jp en tags: translation license: cc-by-4.0", "url": "https://huggingface.co/datasets/ltvmoon/opusbook_ja_en", "project_name": "opusbook_ja_en", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Text Generation", "Machine Translation", "Tagging", "Annotation and Dataset Development" ] }, { "description": "common voice, google fleurs, JSUTv1.1, JAS_v2 (joujiboi/japanese-anime-speech-v2)", "url": "https://huggingface.co/datasets/sin2piusc/jgca_v2_50k_2", "project_name": "jgca_v2_50k_2", "downloads": 11, "source": "Hugging Face", "score": -0.052026864340564666, "first_commit": "2024-07-08 18:06:27", "latest_commit": "2024-07-24 18:58:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "natto-py combines the Python programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.", "url": "https://github.com/buruzaemon/natto-py", "project_name": "natto-py", "stargazers_count": 93, "source": "GitHub", "score": -0.05434702094382028, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Dictionary based Sentiment Analysis for Japanese", "url": "https://github.com/ikegami-yukino/oseti", "project_name": "oseti", "stargazers_count": 93, "source": "GitHub", "score": -0.05434702094382028, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Sentiment Analysis", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "漢字データベースの辞書関連データ", "url": "https://github.com/cjkvi/cjkvi-dict", "project_name": "cjkvi-dict", "stargazers_count": 93, "source": "GitHub", "score": -0.05434702094382028, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Corpus of Annual Reports in Japan", "url": "https://github.com/chakki-works/CoARiJ", "project_name": "CoARiJ", "stargazers_count": 92, "source": "GitHub", "score": -0.05717921266543762, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "NMeCab: About Japanese morphological analyzer on .NET", "url": "https://github.com/komutan/NMeCab", "project_name": "NMeCab", "stargazers_count": 92, "source": "GitHub", "score": -0.05717921266543762, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Unidic packaged for installation via pip.", "url": "https://github.com/polm/unidic-py", "project_name": "unidic-py", "stargazers_count": 90, "source": "GitHub", "score": -0.06284359610867232, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "alpacaデータセットを日本語化したものです", "url": "https://github.com/shi3z/alpaca_ja", "project_name": "alpaca_ja", "stargazers_count": 90, "source": "GitHub", "score": -0.06284359610867232, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "A Python Module for JUMAN++/KNP", "url": "https://github.com/ku-nlp/pyknp", "project_name": "pyknp", "stargazers_count": 89, "source": "GitHub", "score": -0.06567578783028967, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Tagging", "Morphology" ] }, { "description": "Python library for CJK (Chinese, Japanese, and Korean) language dictionary", "url": "https://github.com/cihai/cihai", "project_name": "cihai", "stargazers_count": 89, "source": "GitHub", "score": -0.06567578783028967, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Pytorch implementation and pre-trained Japanese model for CANINE, the efficient character-level transformer.", "url": "https://github.com/octanove/shiba", "project_name": "shiba", "stargazers_count": 89, "source": "GitHub", "score": -0.06567578783028967, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Responsible & Trustworthy NLP", "Representation Learning", "Syntactic Text Processing", "Language Models", "Green & Sustainable NLP", "Semantic Text Processing" ] }, { "description": "日英変換・英語略語展開のための IME 追加辞書 orange_book 日本語から英語への和英変換や英語略語の展開を Google 日本語入力や ATOK などで可能にする IME 拡張辞書", "url": "https://github.com/peaceiris/google-ime-dictionary", "project_name": "google-ime-dictionary", "stargazers_count": 89, "source": "GitHub", "score": -0.06567578783028967, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "STAIR captions: large-scale Japanese image caption dataset", "url": "https://github.com/STAIR-Lab-CIT/STAIR-captions", "project_name": "STAIR-captions", "stargazers_count": 89, "source": "GitHub", "score": -0.06567578783028967, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Visual Data in NLP", "Captioning", "Text Generation", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "GUIで動作する文書校正ツール GUI tool for textlinting.", "url": "https://github.com/gecko655/proofreading-tool", "project_name": "proofreading-tool", "stargazers_count": 86, "source": "GitHub", "score": -0.0741723629951417, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Error Correction" ] }, { "description": "Word2vec (word to vectors) approach for Japanese language using Gensim and Mecab.", "url": "https://github.com/philipperemy/japanese-words-to-vectors", "project_name": "japanese-words-to-vectors", "stargazers_count": 86, "source": "GitHub", "score": -0.0741723629951417, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "Neologism dictionary based on the language resources on the Web for mecab-unidic", "url": "https://github.com/neologd/mecab-unidic-neologd", "project_name": "mecab-unidic-neologd", "stargazers_count": 86, "source": "GitHub", "score": -0.0741723629951417, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese SKK input method library", "url": "https://github.com/ueno/libskk", "project_name": "libskk", "stargazers_count": 85, "source": "GitHub", "score": -0.07700455471675904, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "get japanese manga from url to translate manga image", "url": "https://github.com/ttop32/JMTrans", "project_name": "JMTrans", "stargazers_count": 84, "source": "GitHub", "score": -0.07983674643837639, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Visual Data in NLP", "Text Generation", "Machine Translation", "Multimodality" ] }, { "description": "A large parallel corpus of English and Japanese", "url": "https://github.com/rpryzant/JESC", "project_name": "JESC", "stargazers_count": 84, "source": "GitHub", "score": -0.07983674643837639, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "External dictionary importer for Yomichan.", "url": "https://github.com/FooSoft/yomichan-import", "project_name": "yomichan-import", "stargazers_count": 83, "source": "GitHub", "score": -0.08266893815999374, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "databricks/dolly-v2-12b の学習データに使用されたdatabricks-dolly-15k.jsonl を日本語に翻訳したデータセットになります。", "url": "https://github.com/kunishou/databricks-dolly-15k-ja", "project_name": "databricks-dolly-15k-ja", "stargazers_count": 83, "source": "GitHub", "score": -0.08266893815999374, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "日本語文字変換ライブラリ (javascript)", "url": "https://github.com/kazuhikoarase/jaconv", "project_name": "jaconv", "stargazers_count": 82, "source": "GitHub", "score": -0.08550112988161108, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "Kyoto University Web Document Leads Corpus", "url": "https://github.com/ku-nlp/KWDLC", "project_name": "KWDLC", "stargazers_count": 82, "source": "GitHub", "score": -0.08550112988161108, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Annotation and Dataset Development" ] }, { "description": "「言語処理100本ノック 2025」をPythonで解く", "url": "https://github.com/upura/nlp100v2025", "project_name": "nlp100v2025", "stargazers_count": 82, "source": "GitHub", "score": -0.08550112988161108, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "Project of llm evaluation to Japanese tasks", "url": "https://github.com/wandb/llm-leaderboard", "project_name": "llm-leaderboard", "stargazers_count": 81, "source": "GitHub", "score": -0.08833332160322843, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "LLM構築用の日本語チャットデータセット", "url": "https://github.com/masanorihirano/llm-japanese-dataset", "project_name": "llm-japanese-dataset", "stargazers_count": 81, "source": "GitHub", "score": -0.08833332160322843, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "Japanese tokenizer for Transformers", "url": "https://github.com/WorksApplications/SudachiTra", "project_name": "SudachiTra", "stargazers_count": 80, "source": "GitHub", "score": -0.09116551332484578, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Tagging", "Semantic Text Processing", "Morphology" ] }, { "description": "Japanese Morphological Analysis written in Rust", "url": "https://github.com/Leko/goya", "project_name": "goya", "stargazers_count": 79, "source": "GitHub", "score": -0.09399770504646313, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Tagging", "Morphology" ] }, { "description": "Japanese Dictionary", "url": "https://github.com/gojp/nihongo", "project_name": "nihongo", "stargazers_count": 79, "source": "GitHub", "score": -0.09399770504646313, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "UNICODE絵文字の日本語読み/キーワード/分類辞書", "url": "https://github.com/yagays/emoji-ja", "project_name": "emoji-ja", "stargazers_count": 79, "source": "GitHub", "score": -0.09399770504646313, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Morphology", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "text-only archives of www.aozora.gr.jp", "url": "https://github.com/aozorahack/aozorabunko_text", "project_name": "aozorabunko_text", "stargazers_count": 78, "source": "GitHub", "score": -0.09682989676808047, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "SKK (Simple Kana Kanji henkan) library", "url": "https://github.com/naokiri/cskk", "project_name": "cskk", "stargazers_count": 77, "source": "GitHub", "score": -0.09966208848969782, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "and Romaji", "url": "https://github.com/PSeitz/wana_kana_rust", "project_name": "wana_kana_rust", "stargazers_count": 76, "source": "GitHub", "score": -0.10249428021131517, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Utility scripts for preprocessing Wikipedia texts for NLP", "url": "https://github.com/singletongue/wikipedia-utils", "project_name": "wikipedia-utils", "stargazers_count": 76, "source": "GitHub", "score": -0.10249428021131517, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Text Normalization", "Morphology" ] }, { "description": "Japanese Language Model Financial Evaluation Harness", "url": "https://github.com/pfnet-research/japanese-lm-fin-harness", "project_name": "japanese-lm-fin-harness", "stargazers_count": 75, "source": "GitHub", "score": -0.1053264719329325, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese Realistic Textual Entailment Corpus (NLP 2020, LREC 2020)", "url": "https://github.com/megagonlabs/jrte-corpus", "project_name": "jrte-corpus", "stargazers_count": 75, "source": "GitHub", "score": -0.1053264719329325, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Reasoning", "Textual Inference", "Language Models", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "「言語処理100本ノック 2020」をPythonで解く", "url": "https://github.com/upura/nlp100v2020", "project_name": "nlp100v2020", "stargazers_count": 75, "source": "GitHub", "score": -0.1053264719329325, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "Pure Python Japanese address geocoder", "url": "https://github.com/t-sagara/jageocoder", "project_name": "jageocoder", "stargazers_count": 73, "source": "GitHub", "score": -0.1109908553761672, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "A PyTorch Implementation of japanese chatbot using BERT and Transformer's decoder", "url": "https://github.com/reppy4620/Dialog", "project_name": "Dialog", "stargazers_count": 73, "source": "GitHub", "score": -0.1109908553761672, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "Laboro BERT Japanese: Japanese BERT Pre-Trained With Web-Corpus", "url": "https://github.com/laboroai/Laboro-BERT-Japanese", "project_name": "Laboro-BERT-Japanese", "stargazers_count": 73, "source": "GitHub", "score": -0.1109908553761672, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "ひらがなIME for IBus", "url": "https://github.com/esrille/ibus-hiragana", "project_name": "ibus-hiragana", "stargazers_count": 72, "source": "GitHub", "score": -0.11382304709778454, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese CLIP by rinna Co., Ltd.", "url": "https://github.com/rinnakk/japanese-clip", "project_name": "japanese-clip", "stargazers_count": 72, "source": "GitHub", "score": -0.11382304709778454, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "This repo contains a list of the 44,998 most common Japanese words in order of frequency, as determined by the University of Leeds Corpus.", "url": "https://github.com/hingston/japanese", "project_name": "japanese", "stargazers_count": 72, "source": "GitHub", "score": -0.11382304709778454, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Information Extraction & Text Mining" ] }, { "description": "TinySegmenter用の学習モデルを自作するためのツール．", "url": "https://github.com/shogo82148/TinySegmenterMaker", "project_name": "TinySegmenterMaker", "stargazers_count": 71, "source": "GitHub", "score": -0.11665523881940189, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "5chの過去ログをスクレイピングして、過去流行った単語(ex, 香具師, orz)などを追跡調査", "url": "https://github.com/GINK03/5ch-analysis", "project_name": "5ch-analysis", "stargazers_count": 71, "source": "GitHub", "score": -0.11665523881940189, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Resembla: Word-based Japanese similar sentence search library", "url": "https://github.com/tuem/resembla", "project_name": "resembla", "stargazers_count": 71, "source": "GitHub", "score": -0.11665523881940189, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Semantic Similarity", "Semantic Text Processing" ] }, { "description": "ニコニコ大百科とピクシブ百科事典の共通部分のIME辞書", "url": "https://github.com/ncaq/dic-nico-intersection-pixiv", "project_name": "dic-nico-intersection-pixiv", "stargazers_count": 71, "source": "GitHub", "score": -0.11665523881940189, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "japanese sentence segmentation library for python", "url": "https://github.com/wwwcojp/ja_sentence_segmenter", "project_name": "ja_sentence_segmenter", "stargazers_count": 70, "source": "GitHub", "score": -0.11948743054101923, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "マウスオーバーした単語を自動で読み取る汎用辞書ツール", "url": "https://github.com/kengo700/mouse_over_dictionary", "project_name": "mouse_over_dictionary", "stargazers_count": 70, "source": "GitHub", "score": -0.11948743054101923, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Exploring Japanese SimCSE", "url": "https://github.com/hpprc/simple-simcse-ja", "project_name": "simple-simcse-ja", "stargazers_count": 69, "source": "GitHub", "score": -0.12231962226263658, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "デジタル化資料OCRテキスト化事業において作成されたOCR学習用データセット", "url": "https://github.com/ndl-lab/pdmocrdataset-part1", "project_name": "pdmocrdataset-part1", "stargazers_count": 69, "source": "GitHub", "score": -0.12231962226263658, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Visual Data in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "The Business Scene Dialogue corpus", "url": "https://github.com/tsuruoka-lab/BSD", "project_name": "BSD", "stargazers_count": 68, "source": "GitHub", "score": -0.12515181398425393, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents", "Annotation and Dataset Development" ] }, { "description": "Neural IME: Neural Input Method Engine", "url": "https://github.com/yohokuno/neural_ime", "project_name": "neural_ime", "stargazers_count": 66, "source": "GitHub", "score": -0.13081619742748862, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Java library and command-line tool to transliterate Japanese kanji to romaji (Latin alphabet)", "url": "https://github.com/nicolas-raoul/jakaroma", "project_name": "jakaroma", "stargazers_count": 65, "source": "GitHub", "score": -0.13364838914910596, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Standalone. Small. Language-neutral. BudouX is the successor to Budou, the machine learning powered line break organizer tool.", "url": "https://github.com/google/budoux", "project_name": "budoux", "stargazers_count": 64, "source": "GitHub", "score": -0.13648058087072332, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Chunking" ] }, { "description": "This repository has implementations of data augmentation for NLP for Japanese.", "url": "https://github.com/kajyuuen/daaja", "project_name": "daaja", "stargazers_count": 64, "source": "GitHub", "score": -0.13648058087072332, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Information Retrieval", "Low-Resource NLP" ] }, { "description": "Samples codes for natural language processing in Japanese", "url": "https://github.com/upura/nlp-recipes-ja", "project_name": "nlp-recipes-ja", "stargazers_count": 64, "source": "GitHub", "score": -0.13648058087072332, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "Safe Rust bindings for mecab a part-of-speech and morphological analyzer library", "url": "https://github.com/tsurai/mecab-rs", "project_name": "mecab-rs", "stargazers_count": 63, "source": "GitHub", "score": -0.13931277259234065, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "spaCy tutorial in English and Japanese. spacy-transformers, BERT, GiNZA.", "url": "https://github.com/yuibi/spacy_tutorial", "project_name": "spacy_tutorial", "stargazers_count": 63, "source": "GitHub", "score": -0.13931277259234065, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Language Models", "Semantic Text Processing" ] }, { "description": "GPTがYouTuberをやります", "url": "https://github.com/karakuri-ai/gptuber-by-langchain", "project_name": "gptuber-by-langchain", "stargazers_count": 62, "source": "GitHub", "score": -0.142144964313958, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Dialogue Systems & Conversational Agents", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "ぷるーふおぶこんせぷとで公開した機械翻訳エンジンを利用する翻訳環境です。フォームに入力された文字列の翻訳、PDFの翻訳が可能です。", "url": "https://github.com/s-taka/fugumt", "project_name": "fugumt", "stargazers_count": 62, "source": "GitHub", "score": -0.142144964313958, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "GPTがYouTuberをやります", "url": "https://github.com/karakuri-ai/gptuber-by-langchain", "project_name": "gptuber-by-langchain", "stargazers_count": 62, "source": "GitHub", "score": -0.142144964313958, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Dialogue Systems & Conversational Agents", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Kuromoji morphological analyzer for kuroshiro.", "url": "https://github.com/hexenq/kuroshiro-analyzer-kuromoji", "project_name": "kuroshiro-analyzer-kuromoji", "stargazers_count": 61, "source": "GitHub", "score": -0.14497715603557534, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Optical character recognition for Japanese text", "url": "https://github.com/aurorawright/owocr", "project_name": "owocr", "stargazers_count": 60, "source": "GitHub", "score": -0.1478093477571927, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "This repository contains the code for supervised fine-tuning of LLM-jp models.", "url": "https://github.com/llm-jp/llm-jp-sft", "project_name": "llm-jp-sft", "stargazers_count": 60, "source": "GitHub", "score": -0.1478093477571927, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "OpenAIのChatGPT APIをSlack上で利用するためのSlackbotスクリプト (日本語での利用が前提)", "url": "https://github.com/sifue/chatgpt-slackbot", "project_name": "chatgpt-slackbot", "stargazers_count": 60, "source": "GitHub", "score": -0.1478093477571927, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Convert external words into Mozc system dictionary", "url": "https://github.com/reasonset/mozcdict-ext", "project_name": "mozcdict-ext", "stargazers_count": 60, "source": "GitHub", "score": -0.1478093477571927, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese IOB2 tagged corpus for Named Entity Recognition.", "url": "https://github.com/Hironsan/IOB2Corpus", "project_name": "IOB2Corpus", "stargazers_count": 60, "source": "GitHub", "score": -0.1478093477571927, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Named Entity Recognition", "Tagging", "Annotation and Dataset Development" ] }, { "description": "LLaVA-JP is a Japanese VLM trained by LLaVA method", "url": "https://github.com/tosiyuki/LLaVA-JP", "project_name": "LLaVA-JP", "stargazers_count": 59, "source": "GitHub", "score": -0.15064153947881004, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Converts Japanese Numerals into number", "url": "https://github.com/twada/japanese-numerals-to-number", "project_name": "japanese-numerals-to-number", "stargazers_count": 59, "source": "GitHub", "score": -0.15064153947881004, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Monorepo for Kanji, Furigana, Japanese DB, and others", "url": "https://github.com/echamudi/japanese-toolkit", "project_name": "japanese-toolkit", "stargazers_count": 57, "source": "GitHub", "score": -0.15630592292204473, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "モーラバランス型日本語コーパス", "url": "https://github.com/mmorise/rohan4600", "project_name": "rohan4600", "stargazers_count": 57, "source": "GitHub", "score": -0.15630592292204473, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Google 日本語入力用DvorakJPローマ字テーブル / DvorakJP Roman Table for Google Japanese Input", "url": "https://github.com/shinespark/dvorakjp-romantable", "project_name": "dvorakjp-romantable", "stargazers_count": 54, "source": "GitHub", "score": -0.16480249808689676, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Kanji transliteration to hiragana/katakana/romaji, in Java", "url": "https://github.com/nicolas-raoul/kakasi-java", "project_name": "kakasi-java", "stargazers_count": 54, "source": "GitHub", "score": -0.16480249808689676, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Extractive summarizer using BertSum as summarization model", "url": "https://github.com/neilctwu/YouyakuMan", "project_name": "YouyakuMan", "stargazers_count": 53, "source": "GitHub", "score": -0.16763468980851412, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Summarization", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese synonym library", "url": "https://github.com/WorksApplications/chikkarpy", "project_name": "chikkarpy", "stargazers_count": 53, "source": "GitHub", "score": -0.16763468980851412, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Morphology" ] }, { "description": "Japanese LLaMa experiment", "url": "https://github.com/lighttransport/japanese-llama-experiment", "project_name": "japanese-llama-experiment", "stargazers_count": 53, "source": "GitHub", "score": -0.16763468980851412, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Generate SKK/MeCab dictionary from Wikipedia(Japanese edition)", "url": "https://github.com/tokuhirom/jawiki-kana-kanji-dict", "project_name": "jawiki-kana-kanji-dict", "stargazers_count": 53, "source": "GitHub", "score": -0.16763468980851412, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Morphology", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "GoogleIME用カタカナ語辞書プロジェクトのアーカイブです。Project archive of Google IME user dictionary from Katakana word ( Japanese loanword ) to English.", "url": "https://github.com/KEINOS/google-ime-user-dictionary-ja-en", "project_name": "google-ime-user-dictionary-ja-en", "stargazers_count": 52, "source": "GitHub", "score": -0.17046688153013145, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Tokenizer POS-Tagger and Dependency-parser with BERT/RoBERTa/DeBERTa models for Japanese and other languages", "url": "https://github.com/KoichiYasuoka/esupar", "project_name": "esupar", "stargazers_count": 50, "source": "GitHub", "score": -0.17613126497336615, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "An open collection of annotated voices in Japanese language", "url": "https://github.com/koniwa/koniwa", "project_name": "koniwa", "stargazers_count": 50, "source": "GitHub", "score": -0.17613126497336615, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "This repository supports YuzuAI's Rakuda leaderboard of Japanese LLMs, which is a Japanese-focused analogue of LMSYS' Vicuna eval.", "url": "https://github.com/yuzu-ai/japanese-llm-ranking", "project_name": "japanese-llm-ranking", "stargazers_count": 49, "source": "GitHub", "score": -0.1789634566949835, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "tokenizer specified for Japanese", "url": "https://github.com/SamuraiT/tinysegmenter", "project_name": "tinysegmenter", "stargazers_count": 49, "source": "GitHub", "score": -0.1789634566949835, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "fasttextとword2vecの比較と、実行スクリプト、学習スクリプトです", "url": "https://github.com/GINK03/fasttext-vs-word2vec-on-twitter-data", "project_name": "fasttext-vs-word2vec-on-twitter-data", "stargazers_count": 49, "source": "GitHub", "score": -0.1789634566949835, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "jp-localization", "url": "https://github.com/yantaisa11/Retrieval-based-Voice-Conversion-WebUI-JP-localization", "project_name": "Retrieval-based-Voice-Conversion-WebUI-JP-localization", "stargazers_count": 49, "source": "GitHub", "score": -0.1789634566949835, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "Python Implementation of EmbedRank", "url": "https://github.com/yagays/embedrank", "project_name": "embedrank", "stargazers_count": 49, "source": "GitHub", "score": -0.1789634566949835, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Semantic Text Processing" ] }, { "description": "BERT and ELECTRA models of PyTorch implementations for Japanese text.", "url": "https://github.com/retarfi/language-pretraining", "project_name": "language-pretraining", "stargazers_count": 49, "source": "GitHub", "score": -0.1789634566949835, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese Daily Dialogue, or 日本語日常対話コーパス in Japanese, is a high-quality multi-turn dialogue dataset containing daily conversations on five topics: dailylife, school, travel, health, and entertainment.", "url": "https://github.com/jqk09a/japanese-daily-dialogue", "project_name": "japanese-daily-dialogue", "stargazers_count": 49, "source": "GitHub", "score": -0.1789634566949835, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "音声認識、文章生成、音声合成を使って対話するチャットボットアプリ", "url": "https://github.com/akarigroup/akari_chatgpt_bot", "project_name": "akari_chatgpt_bot", "stargazers_count": 48, "source": "GitHub", "score": -0.18179564841660084, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Javascript libraries to process text: Arabic, Japanese, etc.", "url": "https://github.com/kariminf/jslingua", "project_name": "jslingua", "stargazers_count": 48, "source": "GitHub", "score": -0.18179564841660084, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "全国の住所データAPI", "url": "https://github.com/geolonia/japanese-addresses-v2", "project_name": "japanese-addresses-v2", "stargazers_count": 46, "source": "GitHub", "score": -0.18746003185983554, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "このサンプルでは、Retrieval Augmented Generation パターンを使用して、独自のデータに対してChatGPT のような体験を作成するためのいくつかのアプローチを示しています。", "url": "https://github.com/nohanaga/azure-search-openai-demo", "project_name": "azure-search-openai-demo", "stargazers_count": 46, "source": "GitHub", "score": -0.18746003185983554, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Retrieval", "Text Generation" ] }, { "description": "Evaluating GPT-4 and ChatGPT on Japanese Medical Licensing Examinations", "url": "https://github.com/jungokasai/IgakuQA", "project_name": "IgakuQA", "stargazers_count": 46, "source": "GitHub", "score": -0.18746003185983554, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "Converts Arabic numerals, or 'western' style numbers, to a Japanese context.", "url": "https://github.com/Greatdane/Convert-Numbers-to-Japanese", "project_name": "Convert-Numbers-to-Japanese", "stargazers_count": 45, "source": "GitHub", "score": -0.1902922235814529, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Mozc for Python: Kana-Kanji converter", "url": "https://github.com/ikegami-yukino/mozcpy", "project_name": "mozcpy", "stargazers_count": 45, "source": "GitHub", "score": -0.1902922235814529, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "xvector model on jtubespeech", "url": "https://github.com/sarulab-speech/xvector_jtubespeech", "project_name": "xvector_jtubespeech", "stargazers_count": 44, "source": "GitHub", "score": -0.19312441530307023, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "DistilBERT model pre-trained on 131 GB of Japanese web text. The teacher model is BERT-base that built in-house at LINE.", "url": "https://github.com/line/LINE-DistilBERT-Japanese", "project_name": "LINE-DistilBERT-Japanese", "stargazers_count": 44, "source": "GitHub", "score": -0.19312441530307023, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Language Models", "Green & Sustainable NLP", "Semantic Text Processing" ] }, { "description": "Japanese Sentences Involving Compositional Knowledge (JSICK) Dataset/JSICK-stress Test Set", "url": "https://github.com/verypluming/JSICK", "project_name": "JSICK", "stargazers_count": 44, "source": "GitHub", "score": -0.19312441530307023, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Textual Inference", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "A small version of UniDic for easy pip installs.", "url": "https://github.com/polm/unidic-lite", "project_name": "unidic-lite", "stargazers_count": 43, "source": "GitHub", "score": -0.1959566070246876, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Morphology", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "日本語TTS（VITS）の学習と音声合成のGradio WebUI", "url": "https://github.com/litagin02/vits-japros-webui", "project_name": "vits-japros-webui", "stargazers_count": 42, "source": "GitHub", "score": -0.19878879874630492, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "A web-app displaying the 2200 kanji characters taught in James Heisig's \"Remembering the Kanji\", 6th edition.", "url": "https://github.com/minosvasilias/kanjigrid", "project_name": "kanjigrid", "stargazers_count": 42, "source": "GitHub", "score": -0.19878879874630492, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Retrieval" ] }, { "description": "Google日本語入力の顔文字辞書∩(,,Ò‿Ó,,)∩", "url": "https://github.com/tiwanari/emoticon", "project_name": "emoticon", "stargazers_count": 42, "source": "GitHub", "score": -0.19878879874630492, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "This repository contains scripts to reproduce the LLM-jp corpus.", "url": "https://github.com/llm-jp/llm-jp-corpus", "project_name": "llm-jp-corpus", "stargazers_count": 42, "source": "GitHub", "score": -0.19878879874630492, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Viterbi-based accelerated tokenizer (Python wrapper)", "url": "https://github.com/daac-tools/python-vibrato", "project_name": "python-vibrato", "stargazers_count": 41, "source": "GitHub", "score": -0.20162099046792226, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Japanese-BPEEncoder", "url": "https://github.com/tanreinama/Japanese-BPEEncoder", "project_name": "Japanese-BPEEncoder", "stargazers_count": 41, "source": "GitHub", "score": -0.20162099046792226, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese-BPEEncoder Version 2", "url": "https://github.com/tanreinama/Japanese-BPEEncoder_V2", "project_name": "Japanese-BPEEncoder_V2", "stargazers_count": 41, "source": "GitHub", "score": -0.20162099046792226, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Normalize and fix common issues with Romaji-based Japanese names.", "url": "https://github.com/jeresig/node-romaji-name", "project_name": "node-romaji-name", "stargazers_count": 41, "source": "GitHub", "score": -0.20162099046792226, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Codes to pre-train Japanese T5 models", "url": "https://github.com/megagonlabs/t5-japanese", "project_name": "t5-japanese", "stargazers_count": 41, "source": "GitHub", "score": -0.20162099046792226, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Character Based Named Entity Recognition.", "url": "https://github.com/chakki-works/namaco", "project_name": "namaco", "stargazers_count": 40, "source": "GitHub", "score": -0.20445318218953962, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition", "Tagging" ] }, { "description": "Get Japanese dialogue corpus", "url": "https://github.com/knok/make-meidai-dialogue", "project_name": "make-meidai-dialogue", "stargazers_count": 40, "source": "GitHub", "score": -0.20445318218953962, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "MeCabを利用した日本語形態素解析WebAPI", "url": "https://github.com/bungoume/mecab-web-api", "project_name": "mecab-web-api", "stargazers_count": 40, "source": "GitHub", "score": -0.20445318218953962, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese BERT trained on Aozora Bunko and Wikipedia, pre-tokenized by MeCab with UniDic & SudachiPy", "url": "https://github.com/akirakubo/bert-japanese-aozora", "project_name": "bert-japanese-aozora", "stargazers_count": 40, "source": "GitHub", "score": -0.20445318218953962, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "DocumentClassificationUsingBERT-Japanese", "url": "https://github.com/nekoumei/DocumentClassificationUsingBERT-Japanese", "project_name": "DocumentClassificationUsingBERT-Japanese", "stargazers_count": 40, "source": "GitHub", "score": -0.20445318218953962, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "このリポジトリは日本語LLMのキャラクターロールプレイに関する性能を評価するために作成しました。", "url": "https://github.com/oshizo/japanese-llm-roleplay-benchmark", "project_name": "japanese-llm-roleplay-benchmark", "stargazers_count": 39, "source": "GitHub", "score": -0.20728537391115695, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "LLM勉強会（LLM-jp）で開発しているLLM用のトークナイザー関連をまとめたリポジトリです．", "url": "https://github.com/llm-jp/llm-jp-tokenizer", "project_name": "llm-jp-tokenizer", "stargazers_count": 39, "source": "GitHub", "score": -0.20728537391115695, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese text preprocessor for Text-to-Speech applications (OpenJTalk rewrite in rust language)", "url": "https://github.com/jpreprocess/jpreprocess", "project_name": "jpreprocess", "stargazers_count": 39, "source": "GitHub", "score": -0.20728537391115695, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "Handwritten Japanese OCR demo using touch panel to draw the input text using Intel OpenVINO toolkit", "url": "https://github.com/yas-sim/handwritten-japanese-ocr", "project_name": "handwritten-japanese-ocr", "stargazers_count": 38, "source": "GitHub", "score": -0.2101175656327743, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Async Japanese Tokenizer Native Plugin for React Native for iOS and Android", "url": "https://github.com/craftzdog/react-native-japanese-tokenizer", "project_name": "react-native-japanese-tokenizer", "stargazers_count": 38, "source": "GitHub", "score": -0.2101175656327743, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "dictionary to find emotion related to text", "url": "https://github.com/sociocom/JIWC-Dictionary", "project_name": "JIWC-Dictionary", "stargazers_count": 38, "source": "GitHub", "score": -0.2101175656327743, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Emotion Analysis", "Sentiment Analysis", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese input method for fcitx5, powered by azooKey engine", "url": "https://github.com/7ka-hiira/fcitx5-hazkey", "project_name": "fcitx5-hazkey", "stargazers_count": 38, "source": "GitHub", "score": -0.2101175656327743, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "This is a English-Japanese lexicon for Machine Learning and Deep Learning terminology.", "url": "https://github.com/Machine-Learning-Tokyo/EN-JP-ML-Lexicon", "project_name": "EN-JP-ML-Lexicon", "stargazers_count": 37, "source": "GitHub", "score": -0.21294975735439164, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Web frontend for the JMdict Japanese-English dictionary project, with study list support!", "url": "https://github.com/hlorenzi/jisho-open", "project_name": "jisho-open", "stargazers_count": 37, "source": "GitHub", "score": -0.21294975735439164, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Tokenizer POS-tagger Lemmatizer and Dependency-parser for modern and contemporary Japanese", "url": "https://github.com/KoichiYasuoka/UniDic2UD", "project_name": "UniDic2UD", "stargazers_count": 36, "source": "GitHub", "score": -0.215781949076009, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Syntactic Parsing", "Tagging", "Low-Resource NLP" ] }, { "description": "wikiHow dataset (Japanese version)", "url": "https://github.com/Katsumata420/wikihow_japanese", "project_name": "wikihow_japanese", "stargazers_count": 36, "source": "GitHub", "score": -0.215781949076009, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Morphology" ] }, { "description": "Mecab + NEologd + Docker + Python3", "url": "https://github.com/p-geon/ja-tokenizer-docker-py", "project_name": "ja-tokenizer-docker-py", "stargazers_count": 35, "source": "GitHub", "score": -0.21861414079762634, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "PytorchでBERTの日本語学習済みモデルを利用する", "url": "https://github.com/yagays/pytorch_bert_japanese", "project_name": "pytorch_bert_japanese", "stargazers_count": 35, "source": "GitHub", "score": -0.21861414079762634, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Visual Data in NLP", "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "Wikipediaから作成した日本語名寄せデータセット", "url": "https://github.com/yagays/nayose-wikipedia-ja", "project_name": "nayose-wikipedia-ja", "stargazers_count": 35, "source": "GitHub", "score": -0.21861414079762634, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Information Extraction & Text Mining", "Coreference Resolution", "Named Entity Recognition", "Annotation and Dataset Development" ] }, { "description": "Python wrapper for KyTea", "url": "https://github.com/chezou/Mykytea-python", "project_name": "Mykytea-python", "stargazers_count": 34, "source": "GitHub", "score": -0.2214463325192437, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Robustness in NLP", "Tagging", "Morphology" ] }, { "description": "このリポジトリは、神戸市役所でのChatGPTの試行利用に向けて作成したフロー等をソリューション化し公開するものです。", "url": "https://github.com/City-of-Kobe/pva-aoai-integration-solution", "project_name": "pva-aoai-integration-solution", "stargazers_count": 34, "source": "GitHub", "score": -0.2214463325192437, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Another Anthy", "url": "https://github.com/fujiwarat/anthy-unicode", "project_name": "anthy-unicode", "stargazers_count": 34, "source": "GitHub", "score": -0.2214463325192437, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "法律・判例関係のデータセット", "url": "https://github.com/japanese-law-analysis/data_set", "project_name": "data_set", "stargazers_count": 34, "source": "GitHub", "score": -0.2214463325192437, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Japanese Vicuna QA Benchmark", "url": "https://github.com/ku-nlp/ja-vicuna-qa-benchmark", "project_name": "ja-vicuna-qa-benchmark", "stargazers_count": 33, "source": "GitHub", "score": -0.22427852424086103, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Question Answering" ] }, { "description": "ベイズ階層言語モデルによる教師なし形態素解析", "url": "https://github.com/musyoku/python-npylm", "project_name": "python-npylm", "stargazers_count": 33, "source": "GitHub", "score": -0.22427852424086103, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "MARINE : Multi-task leaRnIng-based JapaNese accent Estimation", "url": "https://github.com/6gsn/marine", "project_name": "marine", "stargazers_count": 33, "source": "GitHub", "score": -0.22427852424086103, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Phonetics", "Speech & Audio in NLP", "Multimodality", "Low-Resource NLP" ] }, { "description": "A Java library to converts between Japanese Hiragana, Katakana, and Romaji scripts.", "url": "https://github.com/andree-surya/moji4j", "project_name": "moji4j", "stargazers_count": 33, "source": "GitHub", "score": -0.22427852424086103, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "BERT with SentencePiece for Japanese text.", "url": "https://github.com/alinear-corp/albert-japanese", "project_name": "albert-japanese", "stargazers_count": 33, "source": "GitHub", "score": -0.22427852424086103, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "日本語マルチタスク言語理解ベンチマーク Japanese Massive Multitask Language Understanding Benchmark", "url": "https://github.com/nlp-waseda/JMMLU", "project_name": "JMMLU", "stargazers_count": 33, "source": "GitHub", "score": -0.22427852424086103, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Responsible & Trustworthy NLP", "Low-Resource NLP" ] }, { "description": "青空文庫振り仮名注釈付き音声コーパスのデータセット", "url": "https://github.com/ndl-lab/hurigana-speech-corpus-aozora", "project_name": "hurigana-speech-corpus-aozora", "stargazers_count": 33, "source": "GitHub", "score": -0.22427852424086103, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Yet another Python binding for Juman++/KNP", "url": "https://github.com/ku-nlp/rhoknp", "project_name": "rhoknp", "stargazers_count": 32, "source": "GitHub", "score": -0.2271107159624784, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "A tool to get the katakana reading of an alphabetical string.", "url": "https://github.com/zomysan/alkana.py", "project_name": "alkana.py", "stargazers_count": 32, "source": "GitHub", "score": -0.2271107159624784, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization", "Tagging", "Morphology" ] }, { "description": "Finetuning Whisper ASR model", "url": "https://github.com/sarulab-speech/whisper-asr-finetune", "project_name": "whisper-asr-finetune", "stargazers_count": 32, "source": "GitHub", "score": -0.2271107159624784, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Speech Recognition", "Text Generation", "Language Models", "Speech & Audio in NLP", "Semantic Text Processing", "Multimodality" ] }, { "description": "gpt-2 based text2text conversion model", "url": "https://github.com/tanreinama/text2text-japanese", "project_name": "text2text-japanese", "stargazers_count": 32, "source": "GitHub", "score": -0.2271107159624784, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Dialogue Response Generation", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese input method for Sway using libanthy", "url": "https://github.com/tadeokondrak/anthywl", "project_name": "anthywl", "stargazers_count": 32, "source": "GitHub", "score": -0.2271107159624784, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "A paraphrase database for Japanese text simplification", "url": "https://github.com/tmu-nlp/simple-jppdb", "project_name": "simple-jppdb", "stargazers_count": 32, "source": "GitHub", "score": -0.2271107159624784, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Paraphrasing", "Text Generation", "Annotation and Dataset Development" ] }, { "description": "Pixiv Encyclopedia Dictionary for Yomitan", "url": "https://github.com/MarvNC/pixiv-yomitan", "project_name": "pixiv-yomitan", "stargazers_count": 31, "source": "GitHub", "score": -0.22994290768409573, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "JVS (Japanese versatile speech) コーパスの自作のラベル", "url": "https://github.com/Hiroshiba/jvs_hiho", "project_name": "jvs_hiho", "stargazers_count": 31, "source": "GitHub", "score": -0.22994290768409573, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Japanese rōmaji input schema for Rime IME", "url": "https://github.com/lazyfoxchan/rime-jaroomaji", "project_name": "rime-jaroomaji", "stargazers_count": 30, "source": "GitHub", "score": -0.2327750994057131, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "A Japanese Parser", "url": "https://github.com/ku-nlp/knp", "project_name": "knp", "stargazers_count": 30, "source": "GitHub", "score": -0.2327750994057131, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing" ] }, { "description": "Rust library of natural language dictionaries using character-wise double-array tries.", "url": "https://github.com/daac-tools/crawdad", "project_name": "crawdad", "stargazers_count": 30, "source": "GitHub", "score": -0.2327750994057131, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Retrieval", "Indexing" ] }, { "description": "Japanese pitch accent utils", "url": "https://github.com/DJTB/hatsuon", "project_name": "hatsuon", "stargazers_count": 30, "source": "GitHub", "score": -0.2327750994057131, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Syntactic Text Processing" ] }, { "description": "Microsoft IMEなどで利用することを想定した「にじさんじ」関連用語の用語辞書です。", "url": "https://github.com/Umichang/nijisanji-ime-dic", "project_name": "nijisanji-ime-dic", "stargazers_count": 30, "source": "GitHub", "score": -0.2327750994057131, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "3行要約データセット", "url": "https://github.com/KodairaTomonori/ThreeLineSummaryDataset", "project_name": "ThreeLineSummaryDataset", "stargazers_count": 30, "source": "GitHub", "score": -0.2327750994057131, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Janome を使ったテキストマイニング入門チュートリアルです。", "url": "https://github.com/mocobeta/janome-tutorial", "project_name": "janome-tutorial", "stargazers_count": 30, "source": "GitHub", "score": -0.2327750994057131, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Heteronym disambiguation library using a fine-tuned BERT model.", "url": "https://github.com/passaglia/yomikata", "project_name": "yomikata", "stargazers_count": 29, "source": "GitHub", "score": -0.23560729112733042, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese analyzer uses kuromoji japanese tokenizer for ElasticSearch", "url": "https://github.com/suguru/elasticsearch-analysis-japanese", "project_name": "elasticsearch-analysis-japanese", "stargazers_count": 29, "source": "GitHub", "score": -0.23560729112733042, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Text Segmentation", "Information Retrieval", "Tagging", "Indexing", "Morphology" ] }, { "description": "COMET-ATOMIC ja", "url": "https://github.com/nlp-waseda/comet-atomic-ja", "project_name": "comet-atomic-ja", "stargazers_count": 29, "source": "GitHub", "score": -0.23560729112733042, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Google Colaboratoryで日本語のBERTを動かす方法です。", "url": "https://github.com/YutaroOgawa/BERT_Japanese_Google_Colaboratory", "project_name": "BERT_Japanese_Google_Colaboratory", "stargazers_count": 29, "source": "GitHub", "score": -0.23560729112733042, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "【2024年版】BERTによるテキスト分類", "url": "https://github.com/hpprc/bert-classification-tutorial-2024", "project_name": "bert-classification-tutorial-2024", "stargazers_count": 29, "source": "GitHub", "score": -0.23560729112733042, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "日本語文法誤り訂正ツール", "url": "https://github.com/youichiro/transformer-copy", "project_name": "transformer-copy", "stargazers_count": 28, "source": "GitHub", "score": -0.23843948284894778, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "This repository provides snippets to use RoBERTa pre-trained on Japanese corpus. Our dataset consists of Japanese Wikipedia and web-scrolled articles, 25GB in total. The released model is built based on that from HuggingFace.", "url": "https://github.com/informatix-inc/bert", "project_name": "bert", "stargazers_count": 28, "source": "GitHub", "score": -0.23843948284894778, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Common LispによるSKK辞書サーバーとその拡張", "url": "https://github.com/tani/cl-skkserv", "project_name": "cl-skkserv", "stargazers_count": 28, "source": "GitHub", "score": -0.23843948284894778, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese data from the Google UDT 2.0.", "url": "https://github.com/megagonlabs/UD_Japanese-GSD", "project_name": "UD_Japanese-GSD", "stargazers_count": 28, "source": "GitHub", "score": -0.23843948284894778, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Syntactic Text Processing" ] }, { "description": "VRChatにAI Botを作るためのリポジトリ", "url": "https://github.com/Geson-anko/vrchatbot", "project_name": "vrchatbot", "stargazers_count": 27, "source": "GitHub", "score": -0.24127167457056511, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "Japanese Movie Recommendation Dialogue dataset", "url": "https://github.com/ku-nlp/JMRD", "project_name": "JMRD", "stargazers_count": 27, "source": "GitHub", "score": -0.24127167457056511, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Dialogue Systems & Conversational Agents" ] }, { "description": "Small example scripts for working with Japanese texts in Python", "url": "https://github.com/olsgaard/Japanese_nlp_scripts", "project_name": "Japanese_nlp_scripts", "stargazers_count": 26, "source": "GitHub", "score": -0.24410386629218245, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "New kana-kanji conversion engine", "url": "https://github.com/yoriyuki/nksnd", "project_name": "nksnd", "stargazers_count": 26, "source": "GitHub", "score": -0.24410386629218245, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "A Japanese Morphological Analyzer written in pure Rust", "url": "https://github.com/agatan/yoin", "project_name": "yoin", "stargazers_count": 26, "source": "GitHub", "score": -0.24410386629218245, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "THE IDOLM@STER words dictionary for Japanese IME (by imas-db.jp)", "url": "https://github.com/maruamyu/imas-ime-dic", "project_name": "imas-ime-dic", "stargazers_count": 26, "source": "GitHub", "score": -0.24410386629218245, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "贵樣ばこゐ辞畫を使て正レい日本语を使ラことが出來ゑ。", "url": "https://github.com/Rinrin0413/Ayashiy-Nipongo-Dic", "project_name": "Ayashiy-Nipongo-Dic", "stargazers_count": 26, "source": "GitHub", "score": -0.24410386629218245, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "全国書誌データから作成した振り仮名のデータセット", "url": "https://github.com/ndl-lab/huriganacorpus-ndlbib", "project_name": "huriganacorpus-ndlbib", "stargazers_count": 26, "source": "GitHub", "score": -0.24410386629218245, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "サイバーセキュリティに関連する公的な組織の日英対応", "url": "https://github.com/SaitoLab/security_words", "project_name": "security_words", "stargazers_count": 26, "source": "GitHub", "score": -0.24410386629218245, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "日本語音声に対して音素ラベルをアラインメントするためのツールです", "url": "https://github.com/dwangomediavillage/pydomino", "project_name": "pydomino", "stargazers_count": 25, "source": "GitHub", "score": -0.2469360580137998, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Convert romaji into hiragana", "url": "https://github.com/koozaki/romaji-conv", "project_name": "romaji-conv", "stargazers_count": 25, "source": "GitHub", "score": -0.2469360580137998, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "東日本大震災発生から24時間以内につぶやかれたジオタグ付きツイートのデジタルアーカイブです。", "url": "https://github.com/wtnv-lab/tweetMapping", "project_name": "tweetMapping", "stargazers_count": 25, "source": "GitHub", "score": -0.2469360580137998, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "ChatGPT3.5を搭載した伺かゴースト「AI妹アイちゃん」です。利用には別途ChatGPTのAPIキーが必要です。", "url": "https://github.com/manju-summoner/AISisterAIChan", "project_name": "AISisterAIChan", "stargazers_count": 25, "source": "GitHub", "score": -0.2469360580137998, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "IPAdic packaged for easy use from Python.", "url": "https://github.com/polm/ipadic-py", "project_name": "ipadic-py", "stargazers_count": 25, "source": "GitHub", "score": -0.2469360580137998, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Accommodation Search Dialog Corpus (宿泊施設探索対話コーパス)", "url": "https://github.com/megagonlabs/asdc", "project_name": "asdc", "stargazers_count": 25, "source": "GitHub", "score": -0.2469360580137998, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Natural Language Interfaces", "Information Retrieval", "Dialogue Systems & Conversational Agents" ] }, { "description": "CAMERA (CyberAgent Multimodal Evaluation for Ad Text GeneRAtion) is the Japanese ad text generation dataset.", "url": "https://github.com/CyberAgentAILab/camera", "project_name": "camera", "stargazers_count": 25, "source": "GitHub", "score": -0.2469360580137998, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Visual Data in NLP", "Text Generation", "Multimodality" ] }, { "description": "Mozc UT Jawiki Dictionary is a dictionary generated from the Japanese Wikipedia for Mozc.", "url": "https://github.com/utuhiro78/mozcdic-ut-jawiki", "project_name": "mozcdic-ut-jawiki", "stargazers_count": 24, "source": "GitHub", "score": -0.24976824973541714, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "python版日本語意味役割付与システム（ASA）", "url": "https://github.com/Takeuchi-Lab-LM/python_asa", "project_name": "python_asa", "stargazers_count": 23, "source": "GitHub", "score": -0.2526004414570345, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "日本語とグロンギ語の相互変換スクリプト", "url": "https://github.com/shogo82148/Grongish", "project_name": "Grongish", "stargazers_count": 23, "source": "GitHub", "score": -0.2526004414570345, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ホロライブ（ホロライブプロダクション）に関する辞書ファイルです。./dictionary フォルダ内のテキストファイルを使って、IMEに単語を追加できます。詳細はREADME.mdをご覧ください。", "url": "https://github.com/heppokofrontend/hololive-dictionary", "project_name": "hololive-dictionary", "stargazers_count": 23, "source": "GitHub", "score": -0.2526004414570345, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "A Japanese input method.", "url": "https://github.com/mojyack/mikan", "project_name": "mikan", "stargazers_count": 23, "source": "GitHub", "score": -0.2526004414570345, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Rakuten MA (Python version)", "url": "https://github.com/ikegami-yukino/rakutenma-python", "project_name": "rakutenma-python", "stargazers_count": 22, "source": "GitHub", "score": -0.25543263317865184, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Morphology", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "A Japanese dependency parser based on BERT", "url": "https://github.com/ku-nlp/bertknp", "project_name": "bertknp", "stargazers_count": 22, "source": "GitHub", "score": -0.25543263317865184, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Language Models", "Semantic Text Processing" ] }, { "description": "Yet another sentence-level tokenizer for the Japanese text", "url": "https://github.com/ikegami-yukino/sengiri", "project_name": "sengiri", "stargazers_count": 22, "source": "GitHub", "score": -0.25543263317865184, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "A Python script for adding furigana to Japanese epub books using Mecab and Unidic.", "url": "https://github.com/Mumumu4/furigana4epub", "project_name": "furigana4epub", "stargazers_count": 22, "source": "GitHub", "score": -0.25543263317865184, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Make learning Japanese easier by adding readings for every kanji in the eBook", "url": "https://github.com/rabbit19981023/yomigana-ebook", "project_name": "yomigana-ebook", "stargazers_count": 22, "source": "GitHub", "score": -0.25543263317865184, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "This tool automatically evaluates Japanese multi-modal large language models across multiple datasets.", "url": "https://github.com/llm-jp/llm-jp-eval-mm", "project_name": "llm-jp-eval-mm", "stargazers_count": 22, "source": "GitHub", "score": -0.25543263317865184, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing", "Multimodality" ] }, { "description": "ILYS-aoba-chatbot", "url": "https://github.com/cl-tohoku/ILYS-aoba-chatbot", "project_name": "ILYS-aoba-chatbot", "stargazers_count": 22, "source": "GitHub", "score": -0.25543263317865184, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents" ] }, { "description": "Japanese BERT Pretrained Model", "url": "https://github.com/tanreinama/RoBERTa-japanese", "project_name": "RoBERTa-japanese", "stargazers_count": 22, "source": "GitHub", "score": -0.25543263317865184, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "かな漢字変換エンジン SKKのための新しい辞書形式", "url": "https://github.com/skk-dict/jisyo", "project_name": "jisyo", "stargazers_count": 22, "source": "GitHub", "score": -0.25543263317865184, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "Japanese instruction data (日本語指示データ)", "url": "https://github.com/megagonlabs/instruction_ja", "project_name": "instruction_ja", "stargazers_count": 22, "source": "GitHub", "score": -0.25543263317865184, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "2023年8月にメルボルン大学から公開された安全性評価データセット『Do-Not-Answer』を日本語LLMの評価においても使用できるように日本語に自動翻訳し、さらに日本文化も考慮して修正したデータセット。", "url": "https://github.com/kunishou/do-not-answer-ja", "project_name": "do-not-answer-ja", "stargazers_count": 22, "source": "GitHub", "score": -0.25543263317865184, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "A fast character conversion and transliteration library based on the scheme defined for Japan National Tax Agency (国税庁) 's", "url": "https://github.com/opencollector/jntajis-python", "project_name": "jntajis-python", "stargazers_count": 21, "source": "GitHub", "score": -0.2582648249002692, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "Rapid Automatic Keyword Extraction algorithm for Japanese", "url": "https://github.com/kanjirz50/rake-ja", "project_name": "rake-ja", "stargazers_count": 21, "source": "GitHub", "score": -0.2582648249002692, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Term Extraction", "Information Extraction & Text Mining" ] }, { "description": "Utility collections for making Japanese text old-fashioned", "url": "https://github.com/hakatashi/kyujitai.js", "project_name": "kyujitai.js", "stargazers_count": 21, "source": "GitHub", "score": -0.2582648249002692, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "首都大日本語 Twitter コーパス", "url": "https://github.com/tmu-nlp/TwitterCorpus", "project_name": "TwitterCorpus", "stargazers_count": 21, "source": "GitHub", "score": -0.2582648249002692, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Information Retrieval", "Sentiment Analysis" ] }, { "description": "敬語変換タスクにおける評価用データセット", "url": "https://github.com/cl-tohoku/keigo_transfer_task", "project_name": "keigo_transfer_task", "stargazers_count": 21, "source": "GitHub", "score": -0.2582648249002692, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "\"Proposal and Evaluation of Japanese Toxicity Schema\" provides a schema and dataset for toxicity in the Japanese language.", "url": "https://github.com/inspection-ai/japanese-toxic-dataset", "project_name": "japanese-toxic-dataset", "stargazers_count": 21, "source": "GitHub", "score": -0.2582648249002692, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Morphology", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "The full-text search system for Aozora Bunko by Groonga. 青空文庫全文検索ライブラリ兼Webアプリ。", "url": "https://github.com/myokoym/aozorasearch", "project_name": "aozorasearch", "stargazers_count": 21, "source": "GitHub", "score": -0.2582648249002692, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Information Retrieval" ] }, { "description": "作って学ぶ正規表現エンジン", "url": "https://github.com/makenowjust/kantan-regex-book", "project_name": "kantan-regex-book", "stargazers_count": 21, "source": "GitHub", "score": -0.2582648249002692, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Vaporetto is a fast and lightweight pointwise prediction based tokenizer. This is a Python wrapper for Vaporetto.", "url": "https://github.com/daac-tools/python-vaporetto", "project_name": "python-vaporetto", "stargazers_count": 20, "source": "GitHub", "score": -0.26109701662188656, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "A CCG parser for Japanese with DTS-representations", "url": "https://github.com/daisukebekki/lightblue", "project_name": "lightblue", "stargazers_count": 20, "source": "GitHub", "score": -0.26109701662188656, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Syntactic Parsing", "Semantic Text Processing" ] }, { "description": "pygeonlp, A python module for geotagging Japanese texts.", "url": "https://github.com/geonlp-platform/pygeonlp", "project_name": "pygeonlp", "stargazers_count": 20, "source": "GitHub", "score": -0.26109701662188656, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Tagging", "Morphology" ] }, { "description": "Darts-clone python binding", "url": "https://github.com/rixwew/darts-clone-python", "project_name": "darts-clone-python", "stargazers_count": 20, "source": "GitHub", "score": -0.26109701662188656, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "A parser for Japanese number (Kanji, arabic) in the natural language.", "url": "https://github.com/takumakanari/japanese-numbers-python", "project_name": "japanese-numbers-python", "stargazers_count": 20, "source": "GitHub", "score": -0.26109701662188656, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing" ] }, { "description": "Easily turn large English text datasets into Japanese text datasets using open LLMs.", "url": "https://github.com/llm-jp/text2dataset", "project_name": "text2dataset", "stargazers_count": 20, "source": "GitHub", "score": -0.26109701662188656, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "このライブラリは、ひらがな・カタカナ、半角・全角の相互変換や判別を始めとした機能を提供します。", "url": "https://github.com/samunohito/kanaria", "project_name": "kanaria", "stargazers_count": 20, "source": "GitHub", "score": -0.26109701662188656, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "日本語フェイクニュースデータセット", "url": "https://github.com/tanreinama/Japanese-Fakenews-Dataset", "project_name": "Japanese-Fakenews-Dataset", "stargazers_count": 20, "source": "GitHub", "score": -0.26109701662188656, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Easy-to-use scripts to fine-tune GPT-2-JA with your own texts, to generate sentences, and to tweet them automatically.", "url": "https://github.com/discus0434/text-generation", "project_name": "text-generation", "stargazers_count": 19, "source": "GitHub", "score": -0.26392920834350386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "最小のサーチエンジン/PageRank/tf-idf", "url": "https://github.com/GINK03/minimal-search-engine", "project_name": "minimal-search-engine", "stargazers_count": 19, "source": "GitHub", "score": -0.26392920834350386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Retrieval", "Indexing" ] }, { "description": "Mixtral-based Ja-En (En-Ja) Translation model", "url": "https://github.com/hpprc/llm-translator", "project_name": "llm-translator", "stargazers_count": 19, "source": "GitHub", "score": -0.26392920834350386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "生成自動評価を行うためのPythonツール", "url": "https://github.com/llm-jp/llm-jp-judge", "project_name": "llm-jp-judge", "stargazers_count": 19, "source": "GitHub", "score": -0.26392920834350386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "A Rust library to convert Japanese Half-width-kana[半角ｶﾅ] and Wide-alphanumeric[全角英数] into normal ones", "url": "https://github.com/gemmarx/unicode-jp-rs", "project_name": "unicode-jp-rs", "stargazers_count": 19, "source": "GitHub", "score": -0.26392920834350386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "A Go library for Zenkaku/Hankaku conversion", "url": "https://github.com/ktnyt/go-moji", "project_name": "go-moji", "stargazers_count": 19, "source": "GitHub", "score": -0.26392920834350386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "日本語で書かれた技術書を収集した生コーパス/ツール", "url": "https://github.com/textlint-ja/technological-book-corpus-ja", "project_name": "technological-book-corpus-ja", "stargazers_count": 19, "source": "GitHub", "score": -0.26392920834350386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Tokenizer POS-tagger Lemmatizer and Dependency-parser for modern and contemporary Japanese with BERT models", "url": "https://github.com/KoichiYasuoka/SuPar-UniDic", "project_name": "SuPar-UniDic", "stargazers_count": 18, "source": "GitHub", "score": -0.2667614000651212, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Syntactic Parsing", "Language Models", "Tagging", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "The repository contains scripts and merge scripts that have been modified to adapt an Alpaca-Lora adapter for LoRA tuning when assuming the use of the \"rinna/japanese-gpt-neox...\" [gpt-neox] model converted to ggml.", "url": "https://github.com/yukaryavka/rinna_gpt-neox_ggml-lora", "project_name": "rinna_gpt-neox_ggml-lora", "stargazers_count": 18, "source": "GitHub", "score": -0.2667614000651212, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "Latest version of MedEX/J (Japanese disease name extractor)", "url": "https://github.com/sociocom/MedNER-J", "project_name": "MedNER-J", "stargazers_count": 18, "source": "GitHub", "score": -0.2667614000651212, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Term Extraction", "Information Extraction & Text Mining" ] }, { "description": "専門用語抽出アルゴリズムの実装の練習", "url": "https://github.com/kanjirz50/termextract", "project_name": "termextract", "stargazers_count": 18, "source": "GitHub", "score": -0.2667614000651212, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "文の敬体(ですます調)、常体(である調)を解析するJavaScriptライブラリ", "url": "https://github.com/textlint-ja/analyze-desumasu-dearu", "project_name": "analyze-desumasu-dearu", "stargazers_count": 18, "source": "GitHub", "score": -0.2667614000651212, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Mozc UT Place Name Dictionary is a dictionary converted from the Japan Post's ZIP code data for Mozc.", "url": "https://github.com/utuhiro78/mozcdic-ut-place-names", "project_name": "mozcdic-ut-place-names", "stargazers_count": 18, "source": "GitHub", "score": -0.2667614000651212, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Evidence-based Explanation Dataset (AACL-IJCNLP 2020)", "url": "https://github.com/megagonlabs/ebe-dataset", "project_name": "ebe-dataset", "stargazers_count": 18, "source": "GitHub", "score": -0.2667614000651212, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Responsible & Trustworthy NLP", "Reasoning", "Fact & Claim Verification", "Explainability & Interpretability in NLP", "Annotation and Dataset Development" ] }, { "description": "TEDxJP-10K ASR Evaluation Dataset", "url": "https://github.com/laboroai/TEDxJP-10K", "project_name": "TEDxJP-10K", "stargazers_count": 18, "source": "GitHub", "score": -0.2667614000651212, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Speech Recognition", "Text Generation", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "An easy to use tokenizer for Japanese text, aimed at language learners and non-linguists", "url": "https://github.com/mkartawijaya/dango", "project_name": "dango", "stargazers_count": 17, "source": "GitHub", "score": -0.2695935917867386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Morphology" ] }, { "description": "Neural Image Caption (NIC) on chainer, its pretrained models on English and Japanese image caption datasets.", "url": "https://github.com/yuyay/chainer_nic", "project_name": "chainer_nic", "stargazers_count": 17, "source": "GitHub", "score": -0.2695935917867386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Visual Data in NLP", "Captioning", "Text Generation", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "A Japanese law parser", "url": "https://github.com/takuyaa/ja-law-parser", "project_name": "ja-law-parser", "stargazers_count": 17, "source": "GitHub", "score": -0.2695935917867386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Linguistics & Cognitive NLP", "Linguistic Theories" ] }, { "description": "Annotated Fuman Kaitori Center Corpus", "url": "https://github.com/ku-nlp/AnnotatedFKCCorpus", "project_name": "AnnotatedFKCCorpus", "stargazers_count": 17, "source": "GitHub", "score": -0.2695935917867386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Annotation and Dataset Development" ] }, { "description": "Japanese Adversarial Natural Language Inference Dataset", "url": "https://github.com/verypluming/JaNLI", "project_name": "JaNLI", "stargazers_count": 17, "source": "GitHub", "score": -0.2695935917867386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Responsible & Trustworthy NLP", "Reasoning", "Textual Inference", "Robustness in NLP" ] }, { "description": "English loanwords in Japanese", "url": "https://github.com/jamesohortle/loanwords_gairaigo", "project_name": "loanwords_gairaigo", "stargazers_count": 17, "source": "GitHub", "score": -0.2695935917867386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "NIILC QA data", "url": "https://github.com/mynlp/niilc-qa", "project_name": "niilc-qa", "stargazers_count": 17, "source": "GitHub", "score": -0.2695935917867386, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Natural Language Interfaces", "Question Answering", "Annotation and Dataset Development" ] }, { "description": "JASS: Japanese-specific Sequence to Sequence Pre-training for Neural Machine Translation (LREC2020) & Linguistically Driven Multi-Task Pre-Training for Low-Resource Neural Machine Translation (ACM TALLIP)", "url": "https://github.com/Mao-KU/JASS", "project_name": "JASS", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models" ] }, { "description": "Use custom tokenizers in spacy-transformers", "url": "https://github.com/megagonlabs/ginza-transformers", "project_name": "ginza-transformers", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "BERT-based GEC tagging for Japanese", "url": "https://github.com/jonnyli1125/gector-ja", "project_name": "gector-ja", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Language Models", "Tagging", "Semantic Text Processing" ] }, { "description": "Swallowプロジェクト大規模言語モデル評価スクリプト", "url": "https://github.com/swallow-llm/swallow-evaluation", "project_name": "swallow-evaluation", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "https://www.nlp.ecei.tohoku.ac.jp/projects/aio/", "url": "https://github.com/cl-tohoku/AIO2_DPR_baseline", "project_name": "AIO2_DPR_baseline", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "教師なし品詞タグ推定", "url": "https://github.com/musyoku/unsupervised-pos-tagging", "project_name": "unsupervised-pos-tagging", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "ふりがなパッド", "url": "https://github.com/esrille/furiganapad", "project_name": "furiganapad", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Extend GNOME On-Screen Keyboard for Input Methods", "url": "https://github.com/google/shuwa", "project_name": "shuwa", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Rinna-3.6B、OpenCALM等の日本語対応LLM(大規模言語モデル)用の簡易Webインタフェースです", "url": "https://github.com/noir55/japanese_llm_simple_webui", "project_name": "japanese_llm_simple_webui", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "GPT-4 を用いて、言語モデルの応答を自動評価するスクリプト", "url": "https://github.com/northern-system-service/gpt4-autoeval", "project_name": "gpt4-autoeval", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Jisho is a CLI tool & Rust library that provides a Japanese-English dictionary.", "url": "https://github.com/eagleflo/jisho", "project_name": "jisho", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Towards a Japanese verb conjugator and deconjugator based on Taeko Kamiya's *The Handbook of Japanese Verbs* and *The Handbook of Japanese Adjectives and Adverbs* opuses.", "url": "https://github.com/fasiha/kamiya-codec", "project_name": "kamiya-codec", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "aMLP Transformer Model for Japanese", "url": "https://github.com/tanreinama/aMLP-japanese", "project_name": "aMLP-japanese", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Semantic Text Processing" ] }, { "description": "Laboro DistilBERT Japanese", "url": "https://github.com/laboroai/Laboro-DistilBERT-Japanese", "project_name": "Laboro-DistilBERT-Japanese", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "禁止用語や単語レベルを考慮した日英対訳コーパスです。", "url": "https://github.com/marmooo/graded-enja-corpus", "project_name": "graded-enja-corpus", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Scripts for creating a Japanese-English parallel corpus and training NMT models", "url": "https://github.com/laboroai/Laboro-ParaCorpus", "project_name": "Laboro-ParaCorpus", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "Top 5000 Japanese family names, with readings, ordered by frequency.", "url": "https://github.com/siikamiika/japanese-family-names", "project_name": "japanese-family-names", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Information Extraction & Text Mining" ] }, { "description": "日本語NLPライブラリGiNZAのすゝめ", "url": "https://github.com/poyo46/ginza-examples", "project_name": "ginza-examples", "stargazers_count": 16, "source": "GitHub", "score": -0.27242578350835595, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Examples to finetune encoder-only and encoder-decoder transformers for Japanese language (Hugging Face) Resources", "url": "https://github.com/tsmatz/huggingface-finetune-japanese", "project_name": "huggingface-finetune-japanese", "stargazers_count": 15, "source": "GitHub", "score": -0.27525797522997325, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "「ひらがな」または「カタカナ」を日本語で発音する際の音声記号(IPA)に変換するコマンド", "url": "https://github.com/amanoese/kana2ipa", "project_name": "kana2ipa", "stargazers_count": 15, "source": "GitHub", "score": -0.27525797522997325, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "A chatbox application built using Nuxt 3 powered by Open AI Text completion endpoint. You can select different personality of your AI friend. The default will respond in Japanese. You can use this app to practice your Nihongo skills!", "url": "https://github.com/supershaneski/openai-chatfriend", "project_name": "openai-chatfriend", "stargazers_count": 15, "source": "GitHub", "score": -0.27525797522997325, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "OpenAIのAPIを利用して、設定したキャラクターと日本語で会話するチャットスクリプトです。", "url": "https://github.com/mutaguchi/character_chat", "project_name": "character_chat", "stargazers_count": 15, "source": "GitHub", "score": -0.27525797522997325, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "OpenAssistant のオープンソースデータ OASST1 を日本語に翻訳したデータセットになります。", "url": "https://github.com/kunishou/oasst1-89k-ja", "project_name": "oasst1-89k-ja", "stargazers_count": 15, "source": "GitHub", "score": -0.27525797522997325, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese Sentence Breaker", "url": "https://github.com/hppRC/japanese-sentence-breaker", "project_name": "japanese-sentence-breaker", "stargazers_count": 14, "source": "GitHub", "score": -0.2780901669515906, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "A phenomenon-wise evaluation dataset for Japanese-English machine translation robustness. The dataset is based on the MTNT dataset, with additional annotations of four linguistic phenomena; Proper Noun, Abbreviated Noun, Colloquial Expression, and Variant. COLING 2020.", "url": "https://github.com/cl-tohoku/PheMT", "project_name": "PheMT", "stargazers_count": 14, "source": "GitHub", "score": -0.2780901669515906, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Responsible & Trustworthy NLP", "Text Generation", "Machine Translation", "Robustness in NLP", "Annotation and Dataset Development" ] }, { "description": "Negima is a Python package to extract phrases in Japanese text by using the part-of-speeches based rules you defined.", "url": "https://github.com/cocodrips/negima", "project_name": "negima", "stargazers_count": 14, "source": "GitHub", "score": -0.2780901669515906, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Term Extraction", "Information Extraction & Text Mining" ] }, { "description": "Joint source channel model for Japanese Kana Kanji conversion, Chinese pinyin input and CJE mixed input.", "url": "https://github.com/yohokuno/jsc", "project_name": "jsc", "stargazers_count": 14, "source": "GitHub", "score": -0.2780901669515906, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Language Models" ] }, { "description": "Coursera Corpus Mining and Multistage Fine-Tuning for Improving Lectures Translation", "url": "https://github.com/shyyhs/CourseraParallelCorpusMining", "project_name": "CourseraParallelCorpusMining", "stargazers_count": 14, "source": "GitHub", "score": -0.2780901669515906, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Language Models", "Semantic Text Processing" ] }, { "description": "NDL古典籍OCR学習用データセット（みんなで翻刻加工データ）", "url": "https://github.com/ndl-lab/ndl-minhon-ocrdataset", "project_name": "ndl-minhon-ocrdataset", "stargazers_count": 14, "source": "GitHub", "score": -0.2780901669515906, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Convert English alphabet to Katakana", "url": "https://github.com/shihono/alphabet2kana", "project_name": "alphabet2kana", "stargazers_count": 13, "source": "GitHub", "score": -0.280922358673208, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Entitypedia is an Extended Named Entity Dictionary from Wikipedia.", "url": "https://github.com/chakki-works/entitypedia", "project_name": "entitypedia", "stargazers_count": 13, "source": "GitHub", "score": -0.280922358673208, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Syntactic Text Processing", "Named Entity Recognition" ] }, { "description": "日本語データセットでのqlora instruction tuning学習サンプルコード", "url": "https://github.com/sonoisa/clip-japanese", "project_name": "clip-japanese", "stargazers_count": 13, "source": "GitHub", "score": -0.280922358673208, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Low-Resource NLP" ] }, { "description": "Anthy is a kana-kanji conversion engine for Japanese. It converts roma-ji to kana, and the kana text to a mixed kana and kanji.", "url": "https://github.com/netsphere-labs/anthy", "project_name": "anthy", "stargazers_count": 13, "source": "GitHub", "score": -0.280922358673208, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "A Web Crawled Corpus for Japanese-Chinese NMT", "url": "https://github.com/zhang-jinyi/Web-Crawled-Corpus-for-Japanese-Chinese-NMT", "project_name": "Web-Crawled-Corpus-for-Japanese-Chinese-NMT", "stargazers_count": 13, "source": "GitHub", "score": -0.280922358673208, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "歴史資料の市民参加型翻刻プラットフォーム「みんなで翻刻」のテキストデータ置き場です。 / Transcription texts created on Minna de Honkoku (https://honkoku.org), a crowdsourced transcription platform for historical Japanese documents.", "url": "https://github.com/yuta1984/honkoku-data", "project_name": "honkoku-data", "stargazers_count": 13, "source": "GitHub", "score": -0.280922358673208, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "デジタル化資料から作成したOCRテキストデータのngram頻度統計情報のデータセット", "url": "https://github.com/ndl-lab/ndlngramdata", "project_name": "ndlngramdata", "stargazers_count": 13, "source": "GitHub", "score": -0.280922358673208, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Japanese version of LIWC2015", "url": "https://github.com/tasukuigarashi/j-liwc2015", "project_name": "j-liwc2015", "stargazers_count": 13, "source": "GitHub", "score": -0.280922358673208, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "青空文庫及びサピエの点字データから作成した振り仮名のデータセット", "url": "https://github.com/ndl-lab/huriganacorpus-aozora", "project_name": "huriganacorpus-aozora", "stargazers_count": 13, "source": "GitHub", "score": -0.280922358673208, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "OCR処理プログラム研究開発事業において作成されたOCR学習用データセット", "url": "https://github.com/ndl-lab/pdmocrdataset-part2", "project_name": "pdmocrdataset-part2", "stargazers_count": 13, "source": "GitHub", "score": -0.280922358673208, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Visual Data in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Japanese semantic test suite (FraCaS counterpart and extensions)", "url": "https://github.com/DaisukeBekki/JSeM", "project_name": "JSeM", "stargazers_count": 13, "source": "GitHub", "score": -0.280922358673208, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Semantic Parsing", "Semantic Similarity", "Semantic Text Processing" ] }, { "description": "Fine-Tuning Google Gemma for Japanese Instructions", "url": "https://github.com/qianniu95/gemma2_2b_finetune_jp_tutorial/blob/main/Gemma2_2b_Japanese_finetuning_colab.ipynb", "project_name": "Gemma2_2b_Japanese_finetuning_colab.ipynb", "stargazers_count": 13, "source": "GitHub", "score": -0.280922358673208, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "日本語文を音素列へ変換するスクリプト", "url": "https://github.com/korguchi/text2phoneme", "project_name": "text2phoneme", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "A powerful text cleaner for Japanese web texts", "url": "https://github.com/ku-nlp/text-cleaning", "project_name": "text-cleaning", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Generation" ] }, { "description": "An ambiguous subtitles dataset for visual scene-aware machine translation", "url": "https://github.com/ku-nlp/VISA", "project_name": "VISA", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Visual Data in NLP", "Text Generation", "Machine Translation", "Multimodality" ] }, { "description": "AllenNLP integration for Shiba: Japanese CANINE model", "url": "https://github.com/shunk031/allennlp-shiba-model", "project_name": "allennlp-shiba-model", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Representation Learning", "Syntactic Text Processing", "Text Segmentation", "Semantic Text Processing" ] }, { "description": "Example code for prefix-tuning GPT/GPT-NeoX models and for inference with trained prefixes", "url": "https://github.com/rinnakk/prefix-tuning-gpt", "project_name": "prefix-tuning-gpt", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Testing of Neural Topic Modeling for Japanese articles", "url": "https://github.com/m3yrin/NTM", "project_name": "NTM", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Topic Modeling" ] }, { "description": "[PyTorch] ClipCap for Japanese", "url": "https://github.com/Japanese-Image-Captioning/ClipCap-for-Japanese", "project_name": "ClipCap-for-Japanese", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Japanese Spelling Error Corrector using BERT(Masked-Language Model). BERTに基づいて日本語校正", "url": "https://github.com/er-ri/bertjsc", "project_name": "bertjsc", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Modifying LAVIS' BLIP2 Q-former with models pretrained on Japanese datasets.", "url": "https://github.com/ZhaoPeiduo/BLIP2-Japanese", "project_name": "BLIP2-Japanese", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "軽量で規制も検閲もない日本語ローカル LLM『LightChatAssistant-TypeB』による、簡単なノベル生成アシスタントです。ローカル特権の永続生成 Generate forever で、当たりガチャを積み上げます。読み上げにも対応。", "url": "https://github.com/zuntan03/easynovelassistant", "project_name": "easynovelassistant", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "深層学習を用いたクイズ自動生成（日本語T5モデル）", "url": "https://github.com/sonoisa/deep-question-generation", "project_name": "deep-question-generation", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Mecab wrapper to generate furigana readings.", "url": "https://github.com/ajatt-tools/mecab_controller", "project_name": "mecab_controller", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "Trials of pre-trained BERT models for the medical domain in Japanese.", "url": "https://github.com/ou-medinfo/medbertjp", "project_name": "medbertjp", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Pretrained models, codes and guidances to pretrain official ALBERT(https://github.com/google-research/albert) on Japanese Wikipedia Resources", "url": "https://github.com/nknytk/albert-japanese-tinysegmenter", "project_name": "albert-japanese-tinysegmenter", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "EasyLightChatAssistant は軽量で検閲や規制のないローカル日本語モデルのLightChatAssistant を、KoboldCpp で簡単にお試しする環境です。", "url": "https://github.com/zuntan03/easylightchatassistant", "project_name": "easylightchatassistant", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Azure OpenAIを活用したアプリケーション実装のリファレンスを目的として、アプリのサンプル（リファレンスアーキテクチャ、サンプルコードとデプロイ手順）を無償提供しています。", "url": "https://github.com/azure-samples/jp-azureopenai-samples", "project_name": "jp-azureopenai-samples", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "JGLUE: Japanese General Language Understanding Evaluation for huggingface datasets", "url": "https://github.com/shunk031/huggingface-datasets_JGLUE", "project_name": "huggingface-datasets_JGLUE", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Responsible & Trustworthy NLP", "Multimodality", "Low-Resource NLP", "Explainability & Interpretability in NLP" ] }, { "description": "JCommonsenseMorality is a dataset created through crowdsourcing that reflects the commonsense morality of Japanese annotators.", "url": "https://github.com/Language-Media-Lab/commonsense-moral-ja", "project_name": "commonsense-moral-ja", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Reasoning", "Commonsense Reasoning" ] }, { "description": "「自然言語処理の教科書」サポートサイト", "url": "https://github.com/mamorlis/nlpbook", "project_name": "nlpbook", "stargazers_count": 12, "source": "GitHub", "score": -0.2837545503948253, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Python binding for Jagger(C++ implementation of Pattern-based Japanese Morphological Analyzer)", "url": "https://github.com/lighttransport/jagger-python", "project_name": "jagger-python", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Code to perform finetuning of the BERT model. BERTモデルのファインチューニングで固有表現抽出用タスクのモデルを作成・使用するサンプルです", "url": "https://github.com/ken11/bert-japanese-ner-finetuning", "project_name": "bert-japanese-ner-finetuning", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "script to evaluate pre-trained Japanese word2vec model on Japanese similarity dataset", "url": "https://github.com/shihono/evaluate_japanese_w2v", "project_name": "evaluate_japanese_w2v", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Representation Learning", "Language Models", "Semantic Similarity", "Semantic Text Processing" ] }, { "description": "A curated list of resources dedicated to Python libraries of NLP for Japanese", "url": "https://github.com/ikegami-yukino/asa-python", "project_name": "asa-python", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Morphology" ] }, { "description": "Japanese Entity Linker.", "url": "https://github.com/izuna385/jel", "project_name": "jel", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Knowledge Representation", "Semantic Text Processing" ] }, { "description": "条件付確率場とベイズ階層言語モデルの統合による半教師あり形態素解析", "url": "https://github.com/musyoku/python-npycrf", "project_name": "python-npycrf", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Low-Resource NLP" ] }, { "description": "About", "url": "https://github.com/shibuiwilliam/aozora_classification", "project_name": "aozora_classification", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese verb/adjective inflections tool", "url": "https://github.com/SmashinFries/PyKatsuyou", "project_name": "PyKatsuyou", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Morphology" ] }, { "description": "Japanese / English Bilingual LLM", "url": "https://github.com/shisa-ai/shisa-v2", "project_name": "shisa-v2", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "英単語から読みを推測するライブラリ。", "url": "https://github.com/voicevox/kanalizer", "project_name": "kanalizer", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "最新の日本語Wikipediaのダンプデータから，MeCabを用いてIPA辞書と最新のNeologd辞書の両方で形態素解析を実施し，その結果に基づいた word2vec，fastText，GloVeの単語分散表現を学習するためのスクリプト", "url": "https://github.com/kamigaito/jawiki_word_vector_updater", "project_name": "jawiki_word_vector_updater", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Code to train Sentence BERT Japanese model for Hugging Face Model Hub", "url": "https://github.com/colorfulscoop/sbert-ja", "project_name": "sbert-ja", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "(๑ ᴖ ᴑ ᴖ ๑)みょんかおもじ（旧Kaomoji_proj）はMicrosoft社の入力ソフト、Microsoft IME向けの顔文字の辞書を作成するプロジェクトです。", "url": "https://github.com/mtripg6666tdr/Kaomoji_proj", "project_name": "Kaomoji_proj", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "This is a Japanese text corpus that consists of Wikipedia articles with various linguistic annotations.", "url": "https://github.com/ku-nlp/WikipediaAnnotatedCorpus", "project_name": "WikipediaAnnotatedCorpus", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Knowledge Representation", "Tagging", "Semantic Text Processing", "Morphology", "Annotation and Dataset Development" ] }, { "description": "Japanese-RP-BenchはLLMの日本語ロールプレイ能力を測定するためのベンチマークです。", "url": "https://github.com/Aratako/Japanese-RP-Bench", "project_name": "Japanese-RP-Bench", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "文法誤り訂正に関する日本語文献を収集・分類するためのリポジトリ", "url": "https://github.com/gotutiyan/GEC-Info-ja", "project_name": "GEC-Info-ja", "stargazers_count": 11, "source": "GitHub", "score": -0.28658674211644264, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Easy wrapper for the postal code data of Japan", "url": "https://github.com/nagataaaas/Jusho", "project_name": "Jusho", "stargazers_count": 10, "source": "GitHub", "score": -0.28941893383806, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "A processor for KyotoCorpus, KWDLC, and AnnotatedFKCCorpus", "url": "https://github.com/ku-nlp/kyoto-reader", "project_name": "kyoto-reader", "stargazers_count": 10, "source": "GitHub", "score": -0.28941893383806, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Tagging" ] }, { "description": "Zunda: Japanese Enhanced Modality Analyzer client for Python.", "url": "https://github.com/ikegami-yukino/zunda-python", "project_name": "zunda-python", "stargazers_count": 10, "source": "GitHub", "score": -0.28941893383806, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Semantic Parsing", "Semantic Text Processing" ] }, { "description": "A summarizer for Japanese articles.", "url": "https://github.com/ryuryukke/japanese_summarizer", "project_name": "japanese_summarizer", "stargazers_count": 10, "source": "GitHub", "score": -0.28941893383806, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Summarization", "Text Generation" ] }, { "description": "wikipedia 日本語の文を、各種日本語の embeddings や faiss index へと変換するスクリプト等。", "url": "https://github.com/hotchpotch/wikipedia-passages-jawiki-embeddings-utils", "project_name": "wikipedia-passages-jawiki-embeddings-utils", "stargazers_count": 10, "source": "GitHub", "score": -0.28941893383806, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Representation Learning", "Semantic Text Processing" ] }, { "description": "A morphological analyzer using mecab dictionary", "url": "https://github.com/nakagami/awabi", "project_name": "awabi", "stargazers_count": 10, "source": "GitHub", "score": -0.28941893383806, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "[Mirror] CLI program for transliterating romaji text to either hiragana or katakana", "url": "https://github.com/gbrlsnchs/kana", "project_name": "kana", "stargazers_count": 10, "source": "GitHub", "score": -0.28941893383806, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Paraphrasing", "Text Generation", "Text Normalization" ] }, { "description": "Japanese glossator for assisted reading of text using Ichiran", "url": "https://github.com/Netdex/niinii", "project_name": "niinii", "stargazers_count": 10, "source": "GitHub", "score": -0.28941893383806, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Sentiment Analysis in Japanese. sentiment_ja with JavaScript", "url": "https://github.com/otodn/sentiment_ja_js", "project_name": "sentiment_ja_js", "stargazers_count": 10, "source": "GitHub", "score": -0.28941893383806, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Sentiment Analysis" ] }, { "description": "Name Searcher in Japanese", "url": "https://github.com/kuniwak/name", "project_name": "name", "stargazers_count": 10, "source": "GitHub", "score": -0.28941893383806, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Information Retrieval" ] }, { "description": "open source mozc dictionaryをMeCab辞書のフォーマットに変換したものです。", "url": "https://github.com/akirakubo/mecab-mozcdic", "project_name": "mecab-mozcdic", "stargazers_count": 10, "source": "GitHub", "score": -0.28941893383806, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "ディープラーニングによる自然言語処理（共立出版）のサポートページです", "url": "https://github.com/python-nlp-book/python-nlp-book", "project_name": "python-nlp-book", "stargazers_count": 10, "source": "GitHub", "score": -0.28941893383806, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Japanese text parser to separate Hiragana/Katakana string into morae (syllables).", "url": "https://github.com/tachi-hi/jamorasep", "project_name": "jamorasep", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing" ] }, { "description": "A tool for automatic English to Katakana conversion", "url": "https://github.com/Patchethium/e2k", "project_name": "e2k", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Comparison of Japanese Sentence Segmentation Tools", "url": "https://github.com/hkiyomaru/ja-senter-benchmark", "project_name": "ja-senter-benchmark", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "To investigate various DNN text classifiers including MLP, CNN, RNN, BERT approaches.", "url": "https://github.com/Masao-Taketani/japanese_text_classification", "project_name": "japanese_text_classification", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese version of DNorm", "url": "https://github.com/sociocom/DNorm-J", "project_name": "DNorm-J", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "EventGraph is a development platform for high-level NLP applications in Japanese.", "url": "https://github.com/ku-nlp/pyknp-eventgraph", "project_name": "pyknp-eventgraph", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Event Extraction", "Structured Data in NLP" ] }, { "description": "WordCloudでの日本語文章をMecab（形態素解析エンジン）を使用せずに形態素解析チックな表示を実現するスクリプト", "url": "https://github.com/aocattleya/WordCloud-Japanese", "project_name": "WordCloud-Japanese", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "日本語ワードネットを利用したDBアクセスライブラリ", "url": "https://github.com/hiraokusky/snark", "project_name": "snark", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "JaSPICE: Automatic Evaluation Metric Using Predicate-Argument Structures for Image Captioning Models", "url": "https://github.com/keio-smilab23/JaSPICE", "project_name": "JaSPICE", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Captioning", "Text Generation", "Multimodality" ] }, { "description": "We pretrained a RoBERTa-based Japanese masked language model on paper abstracts from the academic database CiNii Articles.", "url": "https://github.com/EhimeNLP/AcademicRoBERTa", "project_name": "AcademicRoBERTa", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "AMI Meeting Parallel Corpus", "url": "https://github.com/tsuruoka-lab/AMI-Meeting-Parallel-Corpus", "project_name": "AMI-Meeting-Parallel-Corpus", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "A Japanese Corpus of Many Specialized Domains (JCMS)", "url": "https://github.com/shigashiyama/jcms", "project_name": "jcms", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Semantic Text Processing", "Morphology", "Annotation and Dataset Development" ] }, { "description": "Mecon Audio(Medical Conference Audio)は厚生労働省主催の先進医療会議の議事録の読み上げデータセットです。", "url": "https://github.com/elith-co-jp/meconaudio", "project_name": "meconaudio", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Speech & Audio in NLP", "Multimodality" ] }, { "description": "検索拡張(RAG)評価のための日本語Q&Aデータセット", "url": "https://github.com/hotchpotch/jqara", "project_name": "jqara", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "日本語情報検索評価のための小規模でカジュアルなWebタイトルと概要のデータセット", "url": "https://github.com/hotchpotch/jacwir", "project_name": "jacwir", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "Various annotations of Manga109 dataset", "url": "https://github.com/manga109/public-annotations", "project_name": "public-annotations", "stargazers_count": 9, "source": "GitHub", "score": -0.29225112555967736, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "NDLOCR用テキスト認識モジュール", "url": "https://github.com/ndl-lab/text_recognition", "project_name": "text_recognition", "stargazers_count": 8, "source": "GitHub", "score": -0.29508331728129467, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Lookup japanese words by radical patterns", "url": "https://github.com/itayperl/kantan", "project_name": "kantan", "stargazers_count": 8, "source": "GitHub", "score": -0.29508331728129467, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Linguistics & Cognitive NLP" ] }, { "description": "Generates plain or tokenized text files from the Aozora Bunko", "url": "https://github.com/borh/aozora-corpus-generator", "project_name": "aozora-corpus-generator", "stargazers_count": 8, "source": "GitHub", "score": -0.29508331728129467, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Morphology" ] }, { "description": "A Japanese Medical Information Extraction Toolkit", "url": "https://github.com/racerandom/JaMIE", "project_name": "JaMIE", "stargazers_count": 8, "source": "GitHub", "score": -0.29508331728129467, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Term Extraction", "Information Extraction & Text Mining" ] }, { "description": "mecab-ipadic seed dictionary reader", "url": "https://github.com/takuyaa/mecab-ipadic-seed", "project_name": "mecab-ipadic-seed", "stargazers_count": 8, "source": "GitHub", "score": -0.29508331728129467, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "This is a repository for the AI LAB article \"係り受けに基づく日本語単語埋込 (Dependency-based Japanese Word Embeddings)\" ( Article URL https://ai-lab.lapras.com/nlp/japanese-word-embedding/)", "url": "https://github.com/lapras-inc/dependency-based-japanese-word-embeddings", "project_name": "dependency-based-japanese-word-embeddings", "stargazers_count": 8, "source": "GitHub", "score": -0.29508331728129467, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Syntactic Text Processing", "Syntactic Parsing", "Semantic Text Processing" ] }, { "description": "Word Familiarity Rate for 'Word List by Semantic Principles (WLSP)'", "url": "https://github.com/masayu-a/WLSP-familiarity", "project_name": "WLSP-familiarity", "stargazers_count": 8, "source": "GitHub", "score": -0.29508331728129467, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Knowledge Representation", "Semantic Text Processing" ] }, { "description": "Code for J-CRe3 experiments (Ueda et al., LREC-COLING, 2024)", "url": "https://github.com/riken-grp/J-CRe3", "project_name": "J-CRe3", "stargazers_count": 8, "source": "GitHub", "score": -0.29508331728129467, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "Simple Python package (CLI/Python API) for getting japanese readings (yomigana) and accents using MeCab.", "url": "https://github.com/34j/mecab-text-cleaner", "project_name": "mecab-text-cleaner", "stargazers_count": 7, "source": "GitHub", "score": -0.297915509002912, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "Data Augmentation for Japanese Text on AugLy", "url": "https://github.com/chck/AugLy-jp", "project_name": "AugLy-jp", "stargazers_count": 7, "source": "GitHub", "score": -0.297915509002912, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Low-Resource NLP" ] }, { "description": "Juliusを使ったセグメンテーション支援ツール", "url": "https://github.com/Hiroshiba/julius4seg", "project_name": "julius4seg", "stargazers_count": 7, "source": "GitHub", "score": -0.297915509002912, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "Magpieという手法とNemotron-4-340B-Instructを用いて合成対話データセットを作るコード", "url": "https://github.com/aratako/magpie-nemotron", "project_name": "magpie-nemotron", "stargazers_count": 7, "source": "GitHub", "score": -0.297915509002912, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "Whisperのデコーダをllm-jp-1.3b-v1.0に置き換えた音声認識モデルを学習させるためのコード", "url": "https://github.com/tosiyuki/llm-jp-asr", "project_name": "llm-jp-asr", "stargazers_count": 7, "source": "GitHub", "score": -0.297915509002912, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "VITSによるテキスト読み上げ器&ボイスチェンジャー", "url": "https://github.com/zassou65535/vits", "project_name": "vits", "stargazers_count": 7, "source": "GitHub", "score": -0.297915509002912, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multimodality" ] }, { "description": "Colaboratory上でAsagi(合成データセットを活用した大規模日本語VLM)をお試しするサンプル", "url": "https://github.com/kazuhito00/asagi-vlm-colaboratory-sample", "project_name": "asagi-vlm-colaboratory-sample", "stargazers_count": 7, "source": "GitHub", "score": -0.297915509002912, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "形態素解析器性能評価システム MevAL", "url": "https://github.com/teru-oka-1933/meval", "project_name": "meval", "stargazers_count": 7, "source": "GitHub", "score": -0.297915509002912, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "Sudachi向け万病辞書", "url": "https://github.com/yagays/manbyo-sudachi", "project_name": "manbyo-sudachi", "stargazers_count": 7, "source": "GitHub", "score": -0.297915509002912, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "AWSサービス名のGoogle日本語入力向けの辞書", "url": "https://github.com/konyu/aws_dic_for_google_ime", "project_name": "aws_dic_for_google_ime", "stargazers_count": 7, "source": "GitHub", "score": -0.297915509002912, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "Allows Sudachi to normalize from hiragana to katakana from any compound word list", "url": "https://github.com/po3rin/hirakanadic", "project_name": "hirakanadic", "stargazers_count": 7, "source": "GitHub", "score": -0.297915509002912, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "なんとかデータベース( https://supleks.jp/ )からのスクレイピングツールと収集データ", "url": "https://github.com/nuko-yokohama/ramendb", "project_name": "ramendb", "stargazers_count": 7, "source": "GitHub", "score": -0.297915509002912, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Yet Another Japanese Dependency Structure Analyzer", "url": "https://github.com/ikegami-yukino/cabocha", "project_name": "cabocha", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing" ] }, { "description": "A tool to perform sentence segmentation on Japanese text", "url": "https://github.com/mkartawijaya/hasami", "project_name": "hasami", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "Japanese Text Segmenter for Python written in Rust", "url": "https://github.com/alinear-corp/kuzukiri", "project_name": "kuzukiri", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "Converts character span label information to tokenized text-based label information.", "url": "https://github.com/ken11/noyaki", "project_name": "noyaki", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Named Entity Recognition" ] }, { "description": "A PyTorch implementation of the Japanese Predicate-Argument Structure (PAS) analyser presented in the paper of Matsubayashi & Inui (2018) with some improvements.", "url": "https://github.com/cl-tohoku/showcase", "project_name": "showcase", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Argument Mining" ] }, { "description": "Feature annotator to morphemes and phrases based on KNP rule files (pure-Python)", "url": "https://github.com/megagonlabs/desuwa", "project_name": "desuwa", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Tagging", "Morphology" ] }, { "description": "ChirpText is a collection of text processing tools for Python.", "url": "https://github.com/letuananh/chirptext", "project_name": "chirptext", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation" ] }, { "description": "Japanese Wikipedia Cleaner", "url": "https://github.com/hppRC/jawiki-cleaner", "project_name": "jawiki-cleaner", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition" ] }, { "description": "Mixture of Unigram Model and Infinite Mixture of Unigram Model in Python. (混合ユニグラムモデルと無限混合ユニグラムモデル)", "url": "https://github.com/KentoW/mixture-of-unigram-model", "project_name": "mixture-of-unigram-model", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Cognitive Modeling", "Topic Modeling", "Syntactic Text Processing", "Linguistics & Cognitive NLP", "Language Models" ] }, { "description": "Reproduced package based on Masked Language Model Scoring (ACL2020).", "url": "https://github.com/Ryutaro-A/mlm-scoring-transformers", "project_name": "mlm-scoring-transformers", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Ethical NLP", "Language Models", "Semantic Text Processing" ] }, { "description": "A tool for Japanese-English translation and English-Japanese translation by using FuguMT", "url": "https://github.com/tos-kamiya/tra-fugu", "project_name": "tra-fugu", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "This project is a benchmarking tool for evaluating and comparing the performance of various Vision Language Models (VLMs). It uses two datasets: LLaVA-Bench-In-the-Wild and Japanese HERON Bench to measure model performance.", "url": "https://github.com/wandb/heron-vlm-leaderboard", "project_name": "heron-vlm-leaderboard", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models" ] }, { "description": "A quick reference for the material taught in Colloquial Kansai Japanese.", "url": "https://github.com/sethclydesdale/colloquial-kansai-dictionary", "project_name": "colloquial-kansai-dictionary", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "This directory includes a giant Japanese-English subtitle corpus. The raw data comes from the Stanford’s JESC project.", "url": "https://github.com/DayuanJiang/giant_ja-en_parallel_corpus", "project_name": "giant_ja-en_parallel_corpus", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "Japanese Translation of Winograd Schema Challenge", "url": "https://github.com/ku-nlp/Winograd-Schema-Challenge-Ja", "project_name": "Winograd-Schema-Challenge-Ja", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Text Generation", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "This is the corpus of Japanese Text that general policy speech of prime minister of Japan", "url": "https://github.com/yuukimiyo/GeneralPolicySpeechOfPrimeMinisterOfJapan", "project_name": "GeneralPolicySpeechOfPrimeMinisterOfJapan", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Dialogue Commonsense Graph in Japanese", "url": "https://github.com/nlp-waseda/dcsg-ja", "project_name": "dcsg-ja", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Reasoning", "Natural Language Interfaces", "Dialogue Systems & Conversational Agents", "Structured Data in NLP", "Commonsense Reasoning", "Multimodality" ] }, { "description": "Google Colabで日本語テキスト推論を試す", "url": "https://github.com/verypluming/JapaneseNLI", "project_name": "JapaneseNLI", "stargazers_count": 6, "source": "GitHub", "score": -0.3007477007245294, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "natsume-simpleは日本語の係り受け関係検索システム", "url": "https://github.com/borh-lab/natsume-simple", "project_name": "natsume-simple", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "BERTによる日本語固有表現抽出のファインチューニング用プログラム", "url": "https://github.com/jurabiinc/bert-ner-japanese", "project_name": "bert-ner-japanese", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "A simple OCR for manga (Japanese traditional and Japanese vertical)", "url": "https://github.com/yisusdev2005/simple-ocr-for-manga", "project_name": "simple-ocr-for-manga", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Masked Language Model-based Scoring for Japanese and Vietnamese", "url": "https://github.com/minhpqn/jmlm_scoring", "project_name": "jmlm_scoring", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Japanese Tokenizer for transformers library", "url": "https://github.com/retarfi/jptranstokenizer", "project_name": "jptranstokenizer", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models", "Tagging", "Semantic Text Processing", "Morphology" ] }, { "description": "How do different tokenizers perform on downstream tasks in scriptio continua languages?: A case study in Japanese-ACL SRW 2023", "url": "https://github.com/hitachi-nlp/compare-ja-tokenizer", "project_name": "compare-ja-tokenizer", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Text Segmentation", "Annotation and Dataset Development" ] }, { "description": "Hidden Markov Model (HMM) and Infinite Hidden Markov Model (iHMM) in Python. (隠れマルコフモデルと無限隠れマルコフモデル)", "url": "https://github.com/KentoW/hidden-markov-model", "project_name": "hidden-markov-model", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Language Models" ] }, { "description": "Ngram language model in Python. (Nグラム言語モデル)", "url": "https://github.com/KentoW/Ngram-language-model", "project_name": "Ngram-language-model", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "This repository provides the code for Japanese NLI model, a fine-tuned masked language model.", "url": "https://github.com/CyberAgentAILab/japanese-nli-model", "project_name": "japanese-nli-model", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "裁判例のデータ一覧を裁判所のホームページ(https://www.courts.go.jp/index.html) をスクレイピングして生成するソフトウェア", "url": "https://github.com/japanese-law-analysis/listup_precedent", "project_name": "listup_precedent", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Well, a different Japanese word everyday.", "url": "https://github.com/LuanRT/Japanese-Word-Of-The-Day", "project_name": "Japanese-Word-Of-The-Day", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Semantic Text Processing" ] }, { "description": "Kanji transliteration to hiragana/katakana/romaji, in Go", "url": "https://github.com/sarumaj/go-kakasi", "project_name": "go-kakasi", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "elmo-japanese", "url": "https://github.com/cl-tohoku/elmo-japanese", "project_name": "elmo-japanese", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Multilinguality", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "AITuberの基礎となる部分を開発しています", "url": "https://github.com/M-gen/AITuberDegikkoMirii", "project_name": "AITuberDegikkoMirii", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "A Chrome extention to replace the selected text softly", "url": "https://github.com/kmizu/sftly-replace", "project_name": "sftly-replace", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Paraphrasing", "Text Generation", "Annotation and Dataset Development" ] }, { "description": "A Japanese Kanji Flashcard App built using Python and Langchain, enhanced with the intelligence of GPT-4.", "url": "https://github.com/adilmoujahid/kanji-flashcard-app-gpt4", "project_name": "kanji-flashcard-app-gpt4", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Language Models" ] }, { "description": "日本語タスクにおけるLLMを用いた疑似学習データ生成の検討", "url": "https://github.com/retrieva/japagen", "project_name": "japagen", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Japanese-English Dictionary using jisho.org with audio, csv export of entries, and preview of dictionary sites.", "url": "https://github.com/chrisgrieser/alfred-japanese-dictionary", "project_name": "alfred-japanese-dictionary", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "訪日観光客向けメディアMATCHAの記事から、日本語のテキスト平易化のためのデータセットを構築しました。", "url": "https://github.com/ehimenlp/matcha", "project_name": "matcha", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multimodality" ] }, { "description": "This repository contains the Japanese–English bilingual chat corpus BPersona-chat published in the paper Chat Translation Error Detection for Assisting Cross-lingual Communications at AACL-IJCNLP 2022's Workshop Eval4NLP 2022.", "url": "https://github.com/cl-tohoku/BPersona-chat", "project_name": "BPersona-chat", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Cross-Lingual Transfer", "Annotation and Dataset Development" ] }, { "description": "Chunked word annotation for ITA corpus", "url": "https://github.com/shirayu/ita-corpus-chuwa", "project_name": "ita-corpus-chuwa", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Linguistics & Cognitive NLP", "Annotation and Dataset Development" ] }, { "description": "A cross-linguistic study of pronoun substitutes and address terms", "url": "https://github.com/matbahasa/ProSub", "project_name": "ProSub", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Linguistics & Cognitive NLP", "Linguistic Theories", "Annotation and Dataset Development" ] }, { "description": "Dataset of paper \"Verification of Chain-of-Thought Prompting in Japanese\"", "url": "https://github.com/nlp-waseda/chain-of-thought-ja-dataset", "project_name": "chain-of-thought-ja-dataset", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Responsible & Trustworthy NLP", "Language Models", "Low-Resource NLP" ] }, { "description": "A Dataset of Japanese Lexical Complexity for Non-Native Readers", "url": "https://github.com/naist-nlp/jalecon", "project_name": "jalecon", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "AllenNLP-NER-ja: AllenNLP による日本語を対象とした固有表現抽出", "url": "https://github.com/shunk031/allennlp-NER-ja", "project_name": "allennlp-NER-ja", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Named Entity Recognition" ] }, { "description": "Experiment for Japanese Text classification using chariot and PyTorch", "url": "https://github.com/ymym3412/chariot-PyTorch-Japanese-text-classification", "project_name": "chariot-PyTorch-Japanese-text-classification", "stargazers_count": 5, "source": "GitHub", "score": -0.30357989244614675, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification" ] }, { "description": "Python binding for J.DepP(C++ implementation of Japanese Dependency Parsers)", "url": "https://github.com/lighttransport/jdepp-python", "project_name": "jdepp-python", "stargazers_count": 4, "source": "GitHub", "score": -0.30641208416776405, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing", "Tagging" ] }, { "description": "Yet Another Fast Japanese String Converter", "url": "https://github.com/Hizuru3/python-habachen", "project_name": "python-habachen", "stargazers_count": 4, "source": "GitHub", "score": -0.30641208416776405, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "A BERT model for nagisa", "url": "https://github.com/taishi-i/nagisa_bert", "project_name": "nagisa_bert", "stargazers_count": 4, "source": "GitHub", "score": -0.30641208416776405, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "日本語文を絵文字だけの文に変換するなにか", "url": "https://github.com/mkan0141/toEmoji", "project_name": "toEmoji", "stargazers_count": 4, "source": "GitHub", "score": -0.30641208416776405, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "A Japanese inflection converter", "url": "https://github.com/hkiyomaru/jinf", "project_name": "jinf", "stargazers_count": 4, "source": "GitHub", "score": -0.30641208416776405, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Morphology" ] }, { "description": "日本の住所を都道府県/市区町村/町名/その他に分割するライブラリです", "url": "https://github.com/yuukitoriyama/japanese-address-parser", "project_name": "japanese-address-parser", "stargazers_count": 4, "source": "GitHub", "score": -0.30641208416776405, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese text normalizer for mecab-neologd", "url": "https://github.com/ikegami-yukino/neologdn-java", "project_name": "neologdn-java", "stargazers_count": 4, "source": "GitHub", "score": -0.30641208416776405, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "MonsterHunterのユーザー辞書を作りたい…", "url": "https://github.com/utubo/mh-dict-jp", "project_name": "mh-dict-jp", "stargazers_count": 4, "source": "GitHub", "score": -0.30641208416776405, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "google-vs-deepl-je", "url": "https://github.com/Tzawa/google-vs-deepl-je", "project_name": "google-vs-deepl-je", "stargazers_count": 4, "source": "GitHub", "score": -0.30641208416776405, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "Japanese-Wikipedia Wikification Corpus", "url": "https://github.com/wikiwikification/jawikicorpus", "project_name": "jawikicorpus", "stargazers_count": 4, "source": "GitHub", "score": -0.30641208416776405, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "WRIME for huggingface datasets", "url": "https://github.com/shunk031/huggingface-datasets_wrime", "project_name": "huggingface-datasets_wrime", "stargazers_count": 4, "source": "GitHub", "score": -0.30641208416776405, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "e-Gov 法令検索で配布されている「全ての法令データ」を定期的にダウンロードし、アーカイブしています", "url": "https://github.com/kissge/elaws-history", "project_name": "elaws-history", "stargazers_count": 4, "source": "GitHub", "score": -0.30641208416776405, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "This repository provides Python implementation for building Swallow Corpus Version 1, a large Japanese web corpus (Okazaki et al., 2024), from Common Crawl archives.", "url": "https://github.com/swallow-llm/swallow-corpus", "project_name": "swallow-corpus", "stargazers_count": 4, "source": "GitHub", "score": -0.30641208416776405, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Convert japanese kana from ba-bi-bu-be-bo into va-vi-vu-ve-vo", "url": "https://github.com/eggplants/wiredify", "project_name": "wiredify", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing" ] }, { "description": "Utsuho is a Python module that facilitates bidirectional conversion between half-width katakana and full-width katakana in Japanese.", "url": "https://github.com/juno-rmks/utsuho", "project_name": "utsuho", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "T5による会話生成", "url": "https://github.com/Jinyamyzk/t5_japanese_dialogue_generation", "project_name": "t5_japanese_dialogue_generation", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Dialogue Systems & Conversational Agents", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Example codes for Japanese Realistic Textual Entailment Corpus", "url": "https://github.com/megagonlabs/jrte-corpus_example", "project_name": "jrte-corpus_example", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Reasoning", "Textual Inference", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "Code to perform finetuning of the mBART model.", "url": "https://github.com/ken11/mbart-finetuning", "project_name": "mbart-finetuning", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "Twitter日本語評判分析データセットのためのツイートダウンローダ", "url": "https://github.com/tatHi/tweet_extructor", "project_name": "tweet_extructor", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Retrieval", "Text Classification", "Sentiment Analysis" ] }, { "description": "A PyTorch Implementation of japanese chatbot using BERT and Transformer's decoder", "url": "https://github.com/CjangCjengh/japanese_chatbot", "project_name": "japanese_chatbot", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "A repo for evaluating Japanese LLMs　・　日本語LLMを評価するレポ", "url": "https://github.com/lightblue-tech/japanese_llm_eval", "project_name": "japanese_llm_eval", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "A state-of-the-art open-source Japanese <--> English machine translation system based on the latest NMT research.", "url": "https://github.com/matthewbieda/jp-translate.cloud", "project_name": "jp-translate.cloud", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "OpenJTalkのユーザ辞書をGUIで追加するアプリ", "url": "https://github.com/massao000/add-dictionary", "project_name": "add-dictionary", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "JATTS: Japanese TTS (for research)", "url": "https://github.com/unilight/jatts", "project_name": "jatts", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "A Tantivy tokenizer using Vibrato.", "url": "https://github.com/akr4/tantivy-vibrato", "project_name": "tantivy-vibrato", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Comparison code of various tokenizers", "url": "https://github.com/legalforce-research/tokenizer-speed-bench", "project_name": "tokenizer-speed-bench", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Syntactic Text Processing", "Text Segmentation", "Low-Resource NLP" ] }, { "description": "Learn Japanese vocabs 🇯🇵 by doing quizzes on CLI!", "url": "https://github.com/tysonwu/japanki", "project_name": "japanki", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "a User-Extensible Morphological Analyzer for Japanese. 日本語形態素解析システム", "url": "https://github.com/thammin/juman-bin", "project_name": "juman-bin", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "GPT-2 Japanese model for HuggingFace's transformers", "url": "https://github.com/colorfulscoop/gpt-ja", "project_name": "gpt-ja", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "This repository contains source dictionary files to build dictionaries for JUMAN and Juman++.", "url": "https://github.com/ku-nlp/JumanDIC", "project_name": "JumanDIC", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "漢字の書き順(筆順)・読み方・画数・部首・用例・成り立ちを調べるための漢字辞書です。Unicode 15.1 のすべての漢字 98,682字を収録しています。", "url": "https://github.com/marmooo/kanji-dict", "project_name": "kanji-dict", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Kotlin かな漢字変換プログラム", "url": "https://github.com/KazumaProject/kotlin-kana-kanji-converter", "project_name": "kotlin-kana-kanji-converter", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Small Japanese-English Subtitle Corpus", "url": "https://github.com/yusugomori/jesc_small", "project_name": "jesc_small", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "日本語の対話データ for seq2seq etc", "url": "https://github.com/MokkeMeguru/japanese-corpus", "project_name": "japanese-corpus", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Dialogue Response Generation", "Dialogue Systems & Conversational Agents", "Text Generation" ] }, { "description": "An extension of the BSD corpus with audio and speaker attribute information", "url": "https://github.com/ku-nlp/speechBSD", "project_name": "speechBSD", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "言語処理学会年次大会講演の全リスト・機械可読版など", "url": "https://github.com/whym/anlp-jp-history", "project_name": "anlp-jp-history", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "日本の国会議員のデータ", "url": "https://github.com/sugi2000/Data-on-Japanese-Diet-Members", "project_name": "Data-on-Japanese-Diet-Members", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "日本語学習者のための科学技術業界でよく使われる片仮名と元の単語対照表", "url": "https://github.com/laoshubaby/japanese-technical-dict", "project_name": "japanese-technical-dict", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "日本語の言語モデルのハンズオン資料です", "url": "https://github.com/hnishi/handson-language-models", "project_name": "handson-language-models", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "日本語情報検索チュートリアル", "url": "https://github.com/mpkato/japanese-ir-tutorial", "project_name": "japanese-ir-tutorial", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Rによる自然言語処理・テキスト分析の練習", "url": "https://github.com/paithiov909/textmining-ja", "project_name": "textmining-ja", "stargazers_count": 3, "source": "GitHub", "score": -0.3092442758893814, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Dialogue Systems & Conversational Agents", "Low-Resource NLP" ] }, { "description": "Quickly preprocesses Japanese text using NLP/NER from SpaCy for Japanese translation or other NLP tasks.", "url": "https://github.com/bikatr7/kairyou", "project_name": "kairyou", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Information Extraction & Text Mining", "Named Entity Recognition", "Text Generation", "Machine Translation" ] }, { "description": "Deploying sentiment analysis server with FastAPI and BERT", "url": "https://github.com/izuna385/Japanese-BERT-Sentiment-Analyzer", "project_name": "Japanese-BERT-Sentiment-Analyzer", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Language Models", "Semantic Text Processing", "Sentiment Analysis" ] }, { "description": "Ishi: A volition classifier for Japanese", "url": "https://github.com/ku-nlp/ishi", "project_name": "ishi", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Information Retrieval", "Text Classification" ] }, { "description": "The official repository for \"UnihanLM: Coarse-to-Fine Chinese-Japanese Language Model Pretraining with the Unihan Database\", AACL-IJCNLP 2020", "url": "https://github.com/JetRunner/unihan-lm", "project_name": "unihan-lm", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Language Models", "Semantic Text Processing" ] }, { "description": "Aggregating Japanese words based on Juman++ and ConceptNet5.5", "url": "https://github.com/hkiyomaru/japanese-word-aggregation", "project_name": "japanese-word-aggregation", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Term Extraction", "Information Extraction & Text Mining" ] }, { "description": "[PyTorch] Show, Attend and Tell for Japanese", "url": "https://github.com/Japanese-Image-Captioning/SAT-for-Japanese", "project_name": "SAT-for-Japanese", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Visual Data in NLP", "Dialogue Systems & Conversational Agents", "Multimodality" ] }, { "description": "Haystack + Elasticsearch + wikipedia(ja) を用いた、日本語の質問応答システムのサンプル", "url": "https://github.com/Shingo-Kamata/japanese_qa_demo_with_haystack_and_es", "project_name": "japanese_qa_demo_with_haystack_and_es", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Retrieval", "Indexing" ] }, { "description": "A Japanese Parser (including historical Japanese)", "url": "https://github.com/komiya-lab/monaka", "project_name": "monaka", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Syntactic Parsing" ] }, { "description": "Streamlining Japanese-English Translation with Advanced Preprocessing and Integrated Translation Technologies", "url": "https://github.com/bikatr7/kudasai", "project_name": "kudasai", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "MeCabの形態素解析結果を可視化するツール", "url": "https://github.com/sophiefy/mecab-visualizer", "project_name": "mecab-visualizer", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "ElasticsearchやGiNZA、患者表現辞書を使った患者表現揺れ吸収する意味構造検索を試した", "url": "https://github.com/po3rin/symptom-expression-search", "project_name": "symptom-expression-search", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Retrieval" ] }, { "description": "Trimatch: An (Exact|Prefix|Approximate) String Matching Library", "url": "https://github.com/tuem/trimatch", "project_name": "trimatch", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Retrieval", "Indexing" ] }, { "description": "Finding all pairs of similar documents time- and memory-efficiently", "url": "https://github.com/legalforce-research/find-simdoc", "project_name": "find-simdoc", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Retrieval", "Green & Sustainable NLP" ] }, { "description": "Here provides benchmark tools to compare the performance of data structures for string matching.", "url": "https://github.com/legalforce-research/stringmatch-bench", "project_name": "stringmatch-bench", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Information Retrieval", "Green & Sustainable NLP", "Indexing", "Annotation and Dataset Development" ] }, { "description": "テキストの単語を絵文字に変換する", "url": "https://github.com/elzup/jptext-to-emoji", "project_name": "jptext-to-emoji", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Extend GNOME On-Screen Keyboard for Input Methods", "url": "https://github.com/esrille/oskim", "project_name": "oskim", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Predict pitch accent in Japanese", "url": "https://github.com/shirakaba/pitch-accent", "project_name": "pitch-accent", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Syntactic Text Processing" ] }, { "description": "Yet Another Japanese-Wikipedia Entity Vectors", "url": "https://github.com/wikiwikification/jawikivec", "project_name": "jawikivec", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Knowledge Representation", "Semantic Text Processing" ] }, { "description": "We pretrained a BART-based Japanese masked language model on paper abstracts from the academic database CiNii Articles", "url": "https://github.com/EhimeNLP/AcademicBART", "project_name": "AcademicBART", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Representation Learning", "Language Models", "Semantic Text Processing" ] }, { "description": "電音IME: Microsoft IMEなどで利用することを想定した「電音部」関連用語の辞書", "url": "https://github.com/albno273/denonbu-ime-dic", "project_name": "denonbu-ime-dic", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "うちなーぐち辞典（沖縄語辞典）", "url": "https://github.com/nanjakkun/uchinaaguchi_dict", "project_name": "uchinaaguchi_dict", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Anthy maintenance", "url": "https://github.com/xorgy/anthy", "project_name": "anthy", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "古典日本語の分類語彙表データ", "url": "https://github.com/yocjyet/wlsp-classical", "project_name": "wlsp-classical", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "2023年1月にリニューアルしたNDL Ngram Viewerのソースコード等一式", "url": "https://github.com/ndl-lab/ndlngramviewer_v2", "project_name": "ndlngramviewer_v2", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "Japanese Livedoor news corpus for huggingface datasets", "url": "https://github.com/shunk031/huggingface-datasets_livedoor-news-corpus", "project_name": "huggingface-datasets_livedoor-news-corpus", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Dialogue Systems & Conversational Agents", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "jpn_explainable_qa_dataset", "url": "https://github.com/aiishii/jpn_explainable_qa_dataset", "project_name": "jpn_explainable_qa_dataset", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Responsible & Trustworthy NLP", "Natural Language Interfaces", "Question Answering", "Explainability & Interpretability in NLP", "Annotation and Dataset Development" ] }, { "description": "JEMHopQA (Japanese Explainable Multi-hop Question Answering) is a Japanese multi-hop QA dataset that can evaluate internal reasoning.", "url": "https://github.com/aiishii/jemhopqa", "project_name": "jemhopqa", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Responsible & Trustworthy NLP", "Natural Language Interfaces", "Question Answering", "Explainability & Interpretability in NLP" ] }, { "description": "常用漢字表他、漢字に関するデータ", "url": "https://github.com/mimneko/kanji-data", "project_name": "kanji-data", "stargazers_count": 2, "source": "GitHub", "score": -0.3120764676109988, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese text normalizer that resolves spelling inconsistencies. （日本語表記揺れ解消ツール）", "url": "https://github.com/sea-turt1e/yurenizer", "project_name": "yurenizer", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Normalization" ] }, { "description": "詳細なアノテーション基準に基づく症例報告コーパスからの固有表現及び関係の抽出精度の推論を行うコード", "url": "https://github.com/aih-uth/joint-information-extraction-hs", "project_name": "joint-information-extraction-hs", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Japanese Address Munger", "url": "https://github.com/alvations/yubin", "project_name": "yubin", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "A python library to convert Japanese to phoneme.", "url": "https://github.com/iory/japanese2phoneme", "project_name": "japanese2phoneme", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Syntactic Text Processing" ] }, { "description": "This repository contains codes related to the experiments in \"An Experimental Evaluation of Japanese Tokenizers for Sentiment-Based Text Classification\"", "url": "https://github.com/arusl/anlp_nlp2021_d3-1", "project_name": "anlp_nlp2021_d3-1", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Responsible & Trustworthy NLP", "Information Retrieval", "Text Classification", "Sentiment Analysis" ] }, { "description": "Japanese-Dialog-Transformerの応答候補に対して、KenLMによるN-gram言語モデルでスコアリングし、フィルタリング若しくはリランキングを行う。", "url": "https://github.com/TUT-SLP-lab/JDT-with-KenLM-scoring", "project_name": "JDT-with-KenLM-scoring", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "The evaluation scripts of JMTEB (Japanese Massive Text Embedding Benchmark)", "url": "https://github.com/sbintuitions/jmteb", "project_name": "jmteb", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Representation Learning", "Semantic Text Processing" ] }, { "description": "日本語データセットでのqlora instruction tuning学習サンプルコード", "url": "https://github.com/sosuke115/qlora_ja", "project_name": "qlora_ja", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Low-Resource NLP" ] }, { "description": "青空文庫全書籍のWord2Vecビルダー+構築済みモデル", "url": "https://github.com/eggplants/aovec", "project_name": "aovec", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "MT model trained using the friendly_JA Corpus attempting to make Japanese easier/more accessible to occidental people by using the Latin/English derived katakana lexicon instead of the standard Sino-Japanese lexicon", "url": "https://github.com/astremo/friendly_JA-Model", "project_name": "friendly_JA-Model", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation" ] }, { "description": "GUI for ChatGPT API For Japanese", "url": "https://github.com/gyokuro33/ChuanhuChatGPTJapanese", "project_name": "ChuanhuChatGPTJapanese", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Response Generation", "Dialogue Systems & Conversational Agents", "Language Models", "Semantic Text Processing" ] }, { "description": "This Chrome extension can translate selected Japanese text to Hiragana by using ChatGPT.", "url": "https://github.com/franzwong/chrome-ext-translate-to-hiragana-with-chatgpt", "project_name": "chrome-ext-translate-to-hiragana-with-chatgpt", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Natural Language Interfaces", "Dialogue Response Generation", "Dialogue Systems & Conversational Agents", "Text Generation", "Machine Translation", "Language Models" ] }, { "description": "ChatVRMはブラウザで簡単に3Dキャラクターと会話ができるデモアプリケーションです。", "url": "https://github.com/pixiv/chatvrm", "project_name": "chatvrm", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "日本語の読みから Emoji に変換するための SKK 辞書 😂", "url": "https://github.com/ymrl/skk-jisyo.emoji-ja", "project_name": "skk-jisyo.emoji-ja", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "難読漢字を学年別にまとめた辞書です。", "url": "https://github.com/marmooo/nandoku", "project_name": "nandoku", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "A FOSS Japanese IME for Android", "url": "https://github.com/nelsonapenn/japanese_android_ime", "project_name": "japanese_android_ime", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "EnJaEL: En-Ja Parallel Entity Linking Dataset (Version 1.0)", "url": "https://github.com/shigashiyama/en-ja-el", "project_name": "en-ja-el", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Knowledge Representation", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "車両不具合情報に関するデータセット", "url": "https://github.com/rindybell/kokkosho_data", "project_name": "kokkosho_data", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "ISBN-13における日本語での出版物 (978-4-XXXXXXXXX) に関するデータ等", "url": "https://github.com/uribo/isbn4groups", "project_name": "isbn4groups", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Repository for Japanese Document-level Relation Extraction Dataset (plan to be released in March).", "url": "https://github.com/youmima/jacred", "project_name": "jacred", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Term Extraction", "Information Extraction & Text Mining", "Relation Extraction" ] }, { "description": "Jamp: Controlled Japanese Temporal Inference Dataset for Evaluating Generalization Capacity of Language Models", "url": "https://github.com/tomo-vv/temporalNLI_dataset", "project_name": "temporalNLI_dataset", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Responsible & Trustworthy NLP", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "否定理解能力を評価するための日本語言語推論データセット JNLI-Neg の公開用リポジトリです。", "url": "https://github.com/asahi-y/jnli-neg", "project_name": "jnli-neg", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "NLP2025 のチュートリアル「地理情報と言語処理実践入門」の資料とソースコード", "url": "https://github.com/yuiseki/nlp2025-tutorial-2", "project_name": "nlp2025-tutorial-2", "stargazers_count": 1, "source": "GitHub", "score": -0.31490865933261614, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP" ] }, { "description": "数量表現や時間表現の抽出・正規化を行うNormalizeNumexpのPython実装", "url": "https://github.com/tkscode/pynormalizenumexp", "project_name": "pynormalizenumexp", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "OCR system for recognizing modern Japanese magazines", "url": "https://github.com/ducanh841988/Kindai-OCR", "project_name": "Kindai-OCR", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Multilinguality", "Visual Data in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "Japanese OCR with CenterNet", "url": "https://github.com/lithium0003/findtextcenternet", "project_name": "findtextcenternet", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Training and evaluation scripts for JGLUE, a Japanese language understanding benchmark", "url": "https://github.com/nobu-g/JGLUE-benchmark", "project_name": "JGLUE-benchmark", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Language Models", "Semantic Text Processing", "Low-Resource NLP", "Explainability & Interpretability in NLP" ] }, { "description": "A framework for few-shot evaluation of autoregressive language models.", "url": "https://github.com/tdc-yamada-ya/lm-evaluation-harness-jp-stable", "project_name": "lm-evaluation-harness-jp-stable", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "Automatic Speech Recognition with deepspeech2 model in pytorch with support from Zakuro AI.", "url": "https://github.com/JeanMaximilienCadic/ASRDeepSpeech", "project_name": "ASRDeepSpeech", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Speech Recognition", "Text Generation", "Speech & Audio in NLP", "Multimodality" ] }, { "description": "部首文字正規化ライブラリ", "url": "https://github.com/yamamaya/radicalchar", "project_name": "radicalchar", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "Yet another Japanese IME for IBus/Linux", "url": "https://github.com/tokuhirom/akaza", "project_name": "akaza", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "A Japanese text frontend processing toolkit", "url": "https://github.com/faruzan0820/natsume", "project_name": "natsume", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [] }, { "description": "Open source RAG with Llama Index for Japanese LLM in low resource settting", "url": "https://github.com/AkimParis/RAG-Japanese", "project_name": "RAG-Japanese", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Text Generation", "Low-Resource NLP" ] }, { "description": "About", "url": "https://github.com/nobu-g/jglue-evaluation-scripts", "project_name": "jglue-evaluation-scripts", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "Wikipediaの日本語記事を元に、ユーザの質問に回答するGradioベースのRAGのサンプル", "url": "https://github.com/lawofcycles/wikipedia-japanese-open-rag", "project_name": "wikipedia-japanese-open-rag", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Information Extraction & Text Mining", "Coreference Resolution", "Named Entity Recognition", "Annotation and Dataset Development" ] }, { "description": "Open source RAG with Llama Index for Japanese LLM in low resource settting", "url": "https://github.com/akimfromparis/rag-japanese", "project_name": "rag-japanese", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Responsible & Trustworthy NLP", "Text Generation", "Low-Resource NLP" ] }, { "description": "連続部分文字列の単語判定を行います", "url": "https://github.com/toufu-24/substring-word-finder", "project_name": "substring-word-finder", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "a Frontier Japanese Speech Generation net", "url": "https://github.com/respaired/tsukasa-speech", "project_name": "tsukasa-speech", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Text Generation", "Speech & Audio in NLP", "Multimodality" ] }, { "description": "A morphological analysis library.", "url": "https://github.com/lindera-morphology/lindera", "project_name": "lindera", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Tagging", "Morphology" ] }, { "description": "Lindera tokenizer for Tantivy.", "url": "https://github.com/lindera-morphology/lindera-tantivy", "project_name": "lindera-tantivy", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Syntactic Text Processing", "Text Segmentation", "Tagging", "Morphology" ] }, { "description": "Official fine-tuning code for \"Emotion Analysis of Japanese Tweets and Comparison to Vaccinations in Japan\"", "url": "https://github.com/PatrickJohnRamos/BERT-Japan-vaccination", "project_name": "BERT-Japan-vaccination", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "multi_labels": [ "Language Models", "Emotion Analysis", "Semantic Text Processing", "Sentiment Analysis" ] }, { "description": "ChatGPTを使ってVRChat上でお喋り出来るようにするプログラム。", "url": "https://github.com/Yuchi-Games/VRChatGPT", "project_name": "VRChatGPT", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Response Generation", "Dialogue Systems & Conversational Agents", "Text Generation", "Language Models", "Semantic Text Processing" ] }, { "description": "ChatGPT の Prompt のサンプルです。", "url": "https://github.com/dahatake/chatgpt-prompt-sample-japanese", "project_name": "chatgpt-prompt-sample-japanese", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Response Generation", "Dialogue Systems & Conversational Agents", "Text Generation", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "ChatGPTやCopilotなど各種生成AI用の「日本語]の Prompt のサンプル", "url": "https://github.com/dahatake/generativeai-prompt-sample-japanese", "project_name": "generativeai-prompt-sample-japanese", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Natural Language Interfaces", "Dialogue Response Generation", "Dialogue Systems & Conversational Agents", "Text Generation", "Language Models", "Semantic Text Processing", "Low-Resource NLP" ] }, { "description": "Microsoft IMEなどで利用することを想定した、現状判明している全てのポケモンの名前を網羅した用語辞書です。", "url": "https://github.com/Umichang/pokemon-ime-dic", "project_name": "pokemon-ime-dic", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Convert data from Japanese dictionary websites and applications into portable file formats", "url": "https://github.com/stephenmk/jitenbot", "project_name": "jitenbot", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Morphology", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "JMdict, JMnedict, KANJIDIC for Yomitan/Yomichan.", "url": "https://github.com/themoeway/jmdict-yomitan", "project_name": "jmdict-yomitan", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Annotation and Dataset Development", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "JLPT level tags for words in Yomichan", "url": "https://github.com/stephenmk/yomichan-jlpt-vocab", "project_name": "yomichan-jlpt-vocab", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Tagging", "Annotation and Dataset Development" ] }, { "description": "azooKey: A Japanese Keyboard iOS Application Fully Developed in Swift", "url": "https://github.com/ensan-hcl/azooKey", "project_name": "azooKey", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Visual Data in NLP", "Multimodality" ] }, { "description": "Japanese Input Method \"azooKey\" for Desktop, supporting macOS", "url": "https://github.com/ensan-hcl/azookey-desktop", "project_name": "azookey-desktop", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Kana-Kanji Conversion Module written in Swift", "url": "https://github.com/ensan-hcl/azookeykanakanjiconverter", "project_name": "azookeykanakanjiconverter", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Language Models", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "openjtalk形式のユーザー辞書", "url": "https://github.com/warihima/kanayomi-dict", "project_name": "kanayomi-dict", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Speech & Audio in NLP" ] }, { "description": "Yet Another macOS SKK Input Method", "url": "https://github.com/mtgto/macskk", "project_name": "macskk", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "Parallel Universal Dependencies.", "url": "https://github.com/megagonlabs/UD_Japanese-PUD", "project_name": "UD_Japanese-PUD", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Syntactic Text Processing", "Syntactic Parsing", "Annotation and Dataset Development" ] }, { "description": "日本語Wikipediaで使用される頻出単語のリスト", "url": "https://github.com/maeda6uiui-backup/WikipediaWordFrequencyList", "project_name": "WikipediaWordFrequencyList", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "friendly_JA is a parallel Japanese-to-Japanese corpus aimed at making Japanese easier by using the Latin/English derived katakana lexicon instead of the standard Sino-Japanese lexicon", "url": "https://github.com/astremo/friendly_JA-Corpus", "project_name": "friendly_JA-Corpus", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Machine Translation", "Annotation and Dataset Development" ] }, { "description": "COPA Dataset in Japanese", "url": "https://github.com/nlp-titech/copa-japanese", "project_name": "copa-japanese", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "CAMERA (CyberAgent Multimodal Evaluation for Ad Text GeneRAtion) for huggingface datasets", "url": "https://github.com/shunk031/huggingface-datasets_CAMERA", "project_name": "huggingface-datasets_CAMERA", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Visual Data in NLP", "Text Generation", "Multimodality" ] }, { "description": "FactCheckSentenceNLIデータセット", "url": "https://github.com/nlp-waseda/FactCheckSentenceNLI-FCSNLI-", "project_name": "FactCheckSentenceNLI-FCSNLI-", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "EaST-MELD is an English-Japanese dataset for emotion-aware speech translation based on MELD.", "url": "https://github.com/ku-nlp/EaST-MELD", "project_name": "EaST-MELD", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Text Generation", "Machine Translation", "Speech & Audio in NLP", "Multimodality" ] }, { "description": "Construct large-scale Japanese audio corpus at home", "url": "https://github.com/reazon-research/reazonspeech", "project_name": "reazonspeech", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Speech & Audio in NLP", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "JADES is a dataset for text simplification in Japanese, described in \"JADES: New Text Simplification Dataset in Japanese Targeted at Non-Native Speakers\" (the paper will be available soon).", "url": "https://github.com/naist-nlp/jades", "project_name": "jades", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Paraphrasing", "Text Generation", "Annotation and Dataset Development" ] }, { "description": "Dataset of UniMorph in Japanese", "url": "https://github.com/cl-tohoku/j-unimorph", "project_name": "j-unimorph", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Syntactic Text Processing", "Morphology" ] }, { "description": "Dataset for the LREC-COLING 2024 paper \"A Gaze-grounded Visual Question Answering Dataset for Clarifying Ambiguous Japanese Questions\"", "url": "https://github.com/riken-grp/GazeVQA", "project_name": "GazeVQA", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Natural Language Interfaces", "Visual Data in NLP", "Question Answering", "Multimodality", "Annotation and Dataset Development" ] }, { "description": "JMED-LLM: Japanese Medical Evaluation Dataset for Large Language Models", "url": "https://github.com/sociocom/jmed-llm", "project_name": "jmed-llm", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Language Models", "Semantic Text Processing" ] }, { "description": "Plain text format for Japanese law", "url": "https://github.com/yamachig/lawtext", "project_name": "lawtext", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Linguistics & Cognitive NLP" ] }, { "description": "話題に基づく語義曖昧性解消評価セット", "url": "https://github.com/nut-jnlp/japanesetopicwsd", "project_name": "japanesetopicwsd", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Annotation and Dataset Development" ] }, { "description": "HDIC : Integrated Database of Hanzi Dictionaries in Early Japan", "url": "https://github.com/shikeda/hdic", "project_name": "hdic", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Vocabulary, Dictionary, and Language Input Method" ] }, { "description": "「源氏物語」形態論情報データ", "url": "https://github.com/togiso/openchj-genji", "project_name": "openchj-genji", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] }, { "description": "This repository contains data for our paper \"AdParaphrase: Paraphrase Dataset for Analyzing Linguistic Features toward Generating Attractive Ad Texts\".", "url": "https://github.com/CyberAgentAILab/AdParaphrase", "project_name": "AdParaphrase", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Paraphrasing", "Text Generation" ] }, { "description": "アスペクトを考慮した日本語時間推論データセットの構築（Jamp_sp: Controlled Japanese Temporal Inference Dataset Considering Aspect）", "url": "https://github.com/ynklab/Jamp_sp", "project_name": "Jamp_sp", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [] }, { "description": "MultiLS-Japanese Lexical Complexity Prediction and Lexical Simplification Dataset for Japanese: annotator profiles, unaggregated annotation, and annotatation guidelines.", "url": "https://github.com/naist-nlp/multils-japanese", "project_name": "multils-japanese", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Paraphrasing", "Text Generation", "Semantic Text Processing", "Annotation and Dataset Development" ] }, { "description": "NINJAL Web Japanese Corpus", "url": "https://github.com/masayu-a/nwjc", "project_name": "nwjc", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "multi_labels": [ "Multilinguality", "Annotation and Dataset Development" ] }, { "description": "環境構築手順とソースコード", "url": "https://github.com/hiroshi-matsuda-rit/nlp2024-tutorial-3", "project_name": "nlp2024-tutorial-3", "stargazers_count": 0, "source": "GitHub", "score": -0.31774085105423344, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null, "multi_labels": [ "Phonology", "Annotation and Dataset Development" ] } ]