chengzl18 commited on
Commit
def1786
•
1 Parent(s): c786dd0

Upload with huggingface_hub

Browse files
Files changed (2) hide show
  1. pytorch_model.bin +1 -1
  2. tokenizer_config.json +8 -2
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0d0cb6b55adc4722fed62a367f3e54e0d510ee90926fab45f01540bb5c32d1c
3
  size 382042873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:914b38d56f0084daeb0388f7a3282b780f9fd199162188471cad63952678f77f
3
  size 382042873
tokenizer_config.json CHANGED
@@ -4,13 +4,19 @@
4
  "do_lower_case": true,
5
  "mask_token": "[MASK]",
6
  "model_max_length": 1000000000000000019884624838656,
7
- "name_or_path": "output/_extracted_model/bert-base-chinese-extracted",
8
  "never_split": null,
9
  "pad_token": "[PAD]",
10
  "sep_token": "[SEP]",
11
  "special_tokens_map_file": null,
12
  "strip_accents": null,
13
  "tokenize_chinese_chars": true,
14
- "tokenizer_class": "BertTokenizer",
 
 
 
 
 
 
15
  "unk_token": "[UNK]"
16
  }
 
4
  "do_lower_case": true,
5
  "mask_token": "[MASK]",
6
  "model_max_length": 1000000000000000019884624838656,
7
+ "name_or_path": "bert/tokenizer/bert-base-chinese-char-cm",
8
  "never_split": null,
9
  "pad_token": "[PAD]",
10
  "sep_token": "[SEP]",
11
  "special_tokens_map_file": null,
12
  "strip_accents": null,
13
  "tokenize_chinese_chars": true,
14
+ "tokenizer_class": "ChineseCharTokenizer",
15
+ "auto_map": {
16
+ "AutoTokenizer": [
17
+ "cctokenizer.ChineseCharTokenizer",
18
+ null
19
+ ]
20
+ },
21
  "unk_token": "[UNK]"
22
  }