Upload tokenizer

Browse files

Files changed (3) hide show

README.md +4 -4
special_tokens_map.json +67 -10
tokenizer_config.json +8 -2

README.md CHANGED Viewed

@@ -5,14 +5,14 @@ license_link: https://huggingface.co/Qwen/Qwen2.5-72B-Instruct/blob/main/LICENSE
 pipeline_tag: image-text-to-text
 library_name: transformers
 base_model:
-  - OpenGVLab/InternVL3-2B-Instruct
 base_model_relation: finetune
 datasets:
-  - OpenGVLab/MMPR-v1.2
 language:
-  - multilingual
 tags:
-  - internvl
 ---
 # InternVL3-2B Transformers 🤗 Implementation

 pipeline_tag: image-text-to-text
 library_name: transformers
 base_model:
+- OpenGVLab/InternVL3-2B-Instruct
 base_model_relation: finetune
 datasets:
+- OpenGVLab/MMPR-v1.2
 language:
+- multilingual
 tags:
+- internvl
 ---
 # InternVL3-2B Transformers 🤗 Implementation

special_tokens_map.json CHANGED Viewed

@@ -13,16 +13,72 @@
     "<|vision_pad|>",
     "<|image_pad|>",
     "<|video_pad|>",
-    "<img>",
-    "</img>",
-    "<IMG_CONTEXT>",
-    "<quad>",
-    "</quad>",
-    "<ref>",
-    "</ref>",
-    "<box>",
-    "</box>"
   ],
   "eos_token": {
     "content": "<|im_end|>",
     "lstrip": false,
@@ -36,5 +92,6 @@
     "normalized": false,
     "rstrip": false,
     "single_word": false
-  }
 }

     "<|vision_pad|>",
     "<|image_pad|>",
     "<|video_pad|>",
+    {
+      "content": "<img>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</img>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<IMG_CONTEXT>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<quad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</quad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<ref>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</ref>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<box>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</box>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
   ],
+  "context_image_token": "<IMG_CONTEXT>",
+  "end_image_token": "</img>",
   "eos_token": {
     "content": "<|im_end|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
+  },
+  "start_image_token": "<img>"
 }

tokenizer_config.json CHANGED Viewed

@@ -277,14 +277,20 @@
   ],
   "bos_token": null,
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|im_end|>",
   "errors": "replace",
-  "extra_special_tokens": {},
   "model_max_length": 8192,
   "pad_token": "<|endoftext|>",
-  "processor_class": "InternVLProcessor",
   "return_token_type_ids": false,
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",
   "unk_token": null
 }

   ],
   "bos_token": null,
   "clean_up_tokenization_spaces": false,
+  "context_image_token": "<IMG_CONTEXT>",
+  "end_image_token": "</img>",
   "eos_token": "<|im_end|>",
   "errors": "replace",
+  "extra_special_tokens": {
+    "context_image_token": "<IMG_CONTEXT>",
+    "end_image_token": "</img>",
+    "start_image_token": "<img>"
+  },
   "model_max_length": 8192,
   "pad_token": "<|endoftext|>",
   "return_token_type_ids": false,
   "split_special_tokens": false,
+  "start_image_token": "<img>",
   "tokenizer_class": "Qwen2Tokenizer",
   "unk_token": null
 }