zai-org
/

glm-4-9b-chat

Model card Files Files and versions

zR commited on Jun 20, 2024

Commit

547dc25

·

1 Parent(s): 1b77b8d

fix convert_tokens_to_string

Files changed (2) hide show

README_en.md +0 -1
tokenization_chatglm.py +4 -4

README_en.md CHANGED Viewed

@@ -135,7 +135,6 @@ sampling_params = SamplingParams(temperature=0.95, max_tokens=1024, stop_token_i
 inputs = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
 outputs = llm.generate(prompts=inputs, sampling_params=sampling_params)
 print(outputs[0].outputs[0].text)
 ```

 inputs = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
 outputs = llm.generate(prompts=inputs, sampling_params=sampling_params)
 print(outputs[0].outputs[0].text)
 ```

tokenization_chatglm.py CHANGED Viewed

@@ -63,22 +63,22 @@ class ChatGLM4Tokenizer(PreTrainedTokenizer):
         vocab.update(self.added_tokens_encoder)
         return vocab
-    def convert_tokens_to_string(self, tokens: List[Union[bytes, str]]) -> str:
         """
         Converts a sequence of tokens in a single string.
         """
         text = ""
         temp = b""
         for t in tokens:
             if isinstance(t, str):
                 if temp:
                     text += temp.decode("utf-8", errors="replace")
-                    temp = b""
-                text += t
             elif isinstance(t, bytes):
                 temp += t
             else:
-                raise TypeError("token should only be of type types or str")
         if temp:
             text += temp.decode("utf-8", errors="replace")
         return text

         vocab.update(self.added_tokens_encoder)
         return vocab
+    def convert_tokens_to_string(self, tokens: List[Union[bytes, str, int]]) -> str:
         """
         Converts a sequence of tokens in a single string.
         """
         text = ""
         temp = b""
         for t in tokens:
+            if isinstance(t, int):
+                t = chr(t)
             if isinstance(t, str):
                 if temp:
                     text += temp.decode("utf-8", errors="replace")
             elif isinstance(t, bytes):
                 temp += t
             else:
+                raise TypeError("token should only be of type int, bytes or str")
         if temp:
             text += temp.decode("utf-8", errors="replace")
         return text