chengzl18 commited on
Commit
65f4c65
·
1 Parent(s): df4d1ef

Upload with huggingface_hub

Browse files
Files changed (1) hide show
  1. cctokenizer.py +1 -3
cctokenizer.py CHANGED
@@ -251,7 +251,6 @@ class ChineseCharTokenizer(BertTokenizer):
251
  vocab_files_names = {"vocab_file": "vocab.txt", 'mapping_file': "replace.json"}
252
 
253
  def __init__(self, vocab_file, *args, **kwargs):
254
- print(vocab_file)
255
  super(ChineseCharTokenizer, self).__init__(vocab_file, *args, **kwargs)
256
  self.unicoder_ranges = get_unicode_ranges()
257
  self.enclosed_tokens = {token for token in self.vocab if token[0] == '[' and token[-1] == ']' and 'unused' not in token}
@@ -260,8 +259,7 @@ class ChineseCharTokenizer(BertTokenizer):
260
  [token for token in self.enclosed_tokens if len(token) == 6],
261
  [token for token in self.enclosed_tokens if len(token) == 7]
262
  ]
263
- print(vocab_file)
264
- self.replace_map = load_json(os.path.join(self.name_or_path, 'replace.json'))
265
 
266
  # # [EOS]相当于逗号、换行,不用看作special token
267
  def convert_token_to_representative(self, token: str) -> str:
 
251
  vocab_files_names = {"vocab_file": "vocab.txt", 'mapping_file': "replace.json"}
252
 
253
  def __init__(self, vocab_file, *args, **kwargs):
 
254
  super(ChineseCharTokenizer, self).__init__(vocab_file, *args, **kwargs)
255
  self.unicoder_ranges = get_unicode_ranges()
256
  self.enclosed_tokens = {token for token in self.vocab if token[0] == '[' and token[-1] == ']' and 'unused' not in token}
 
259
  [token for token in self.enclosed_tokens if len(token) == 6],
260
  [token for token in self.enclosed_tokens if len(token) == 7]
261
  ]
262
+ self.replace_map = load_json(os.path.join(os.path.dirname(vocab_file), 'replace.json'))
 
263
 
264
  # # [EOS]相当于逗号、换行,不用看作special token
265
  def convert_token_to_representative(self, token: str) -> str: