minpeter commited on
Commit
5180f51
·
verified ·
1 Parent(s): b271ce3

fix bos, eos token id

Browse files
config.json CHANGED
@@ -4,9 +4,9 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 100257,
8
  "end_token_id": 100257,
9
- "eos_token_id": 100257,
10
  "head_dim": 128,
11
  "hidden_act": "silu",
12
  "hidden_size": 1024,
@@ -29,4 +29,4 @@
29
  "transformers_version": "4.50.3",
30
  "use_cache": false,
31
  "vocab_size": 110592
32
- }
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 100272,
8
  "end_token_id": 100257,
9
+ "eos_token_id": 100273,
10
  "head_dim": 128,
11
  "hidden_act": "silu",
12
  "hidden_size": 1024,
 
29
  "transformers_version": "4.50.3",
30
  "use_cache": false,
31
  "vocab_size": 110592
32
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:decbc37141e9bc7b88f38395070c5261d9b68029973dfd3f3b63380c6165a8f4
3
+ size 1132633202
special_tokens_map.json CHANGED
@@ -56,14 +56,14 @@
56
  "<PASSWORD>"
57
  ],
58
  "bos_token": {
59
- "content": "<|endoftext|>",
60
  "lstrip": false,
61
  "normalized": false,
62
  "rstrip": false,
63
  "single_word": false
64
  },
65
  "eos_token": {
66
- "content": "<|endofturn|>",
67
  "lstrip": false,
68
  "normalized": false,
69
  "rstrip": false,
@@ -83,4 +83,4 @@
83
  "rstrip": false,
84
  "single_word": false
85
  }
86
- }
 
56
  "<PASSWORD>"
57
  ],
58
  "bos_token": {
59
+ "content": "<|im_start|>",
60
  "lstrip": false,
61
  "normalized": false,
62
  "rstrip": false,
63
  "single_word": false
64
  },
65
  "eos_token": {
66
+ "content": "<|im_end|>",
67
  "lstrip": false,
68
  "normalized": false,
69
  "rstrip": false,
 
83
  "rstrip": false,
84
  "single_word": false
85
  }
86
+ }
tokenizer_config.json CHANGED
@@ -490,13 +490,13 @@
490
  "<KEY>",
491
  "<PASSWORD>"
492
  ],
493
- "bos_token": "<|endoftext|>",
494
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
495
  "clean_up_tokenization_spaces": true,
496
- "eos_token": "<|endofturn|>",
497
  "extra_special_tokens": {},
498
  "model_max_length": 1000000000000000019884624838656,
499
  "pad_token": "<|endoftext|>",
500
  "tokenizer_class": "GPT2Tokenizer",
501
  "unk_token": "<|endoftext|>"
502
- }
 
490
  "<KEY>",
491
  "<PASSWORD>"
492
  ],
493
+ "bos_token": "<|im_start|>",
494
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
495
  "clean_up_tokenization_spaces": true,
496
+ "eos_token": "<|im_end|>",
497
  "extra_special_tokens": {},
498
  "model_max_length": 1000000000000000019884624838656,
499
  "pad_token": "<|endoftext|>",
500
  "tokenizer_class": "GPT2Tokenizer",
501
  "unk_token": "<|endoftext|>"
502
+ }