shibing624
/

vicuna-baichuan-13b-chat

@@ -552,41 +552,41 @@ class BaichuanForCausalLM(BaichuanPreTrainedModel):
             )
         return self
-    def _build_chat_input(self, tokenizer, messages: List[dict], max_new_tokens: int=0):
         max_new_tokens = max_new_tokens or self.generation_config.max_new_tokens
         max_input_tokens = self.config.model_max_length - max_new_tokens
         max_input_tokens = max(self.config.model_max_length // 2, max_input_tokens)
         total_input, round_input = [], []
-        for i, message in enumerate(messages[::-1]):
-            content_tokens = tokenizer.encode(message['content'])
             if message['role'] == 'user':
-                round_input = [self.generation_config.user_token_id] + content_tokens + round_input
                 if total_input and len(total_input) + len(round_input) > max_input_tokens:
                     break
                 else:
-                    total_input = round_input + total_input
                     if len(total_input) >= max_input_tokens:
                         break
                     else:
                         round_input = []
             elif message['role'] == 'assistant':
-                round_input = [
-                    self.generation_config.assistant_token_id
-                ] + content_tokens + [
-                    self.generation_config.eos_token_id
-                ] + round_input
             else:
                 raise ValueError(f"message role not supported yet: {message['role']}")
         total_input = total_input[-max_input_tokens:]  # truncate left
-        total_input.append(self.generation_config.assistant_token_id)
         total_input = torch.LongTensor([total_input]).to(self.device)
         return total_input
     @torch.no_grad()
-    def chat(self, tokenizer, messages: List[dict], stream=False,
              generation_config: Optional[GenerationConfig]=None):
         generation_config = generation_config or self.generation_config
-        input_ids = self._build_chat_input(tokenizer, messages, generation_config.max_new_tokens)
         if stream:
             from transformers_stream_generator.main import NewGenerationMixin, StreamGenerationConfig
             self.__class__.generate = NewGenerationMixin.generate

             )
         return self
+    def _build_chat_input(self, tokenizer, messages: List[dict], max_new_tokens: int=0, system_prompt=""):
         max_new_tokens = max_new_tokens or self.generation_config.max_new_tokens
         max_input_tokens = self.config.model_max_length - max_new_tokens
         max_input_tokens = max(self.config.model_max_length // 2, max_input_tokens)
         total_input, round_input = [], []
+        for i, message in enumerate(messages):
             if message['role'] == 'user':
+                if i == 0:
+                    content_tokens = tokenizer.encode(system_prompt + "USER: " + message['content'] + " ASSISTANT: ")
+                else:
+                    content_tokens = tokenizer.encode("USER: " + message['content'] + " ASSISTANT: ")
+                round_input += content_tokens
                 if total_input and len(total_input) + len(round_input) > max_input_tokens:
                     break
                 else:
+                    total_input += round_input
                     if len(total_input) >= max_input_tokens:
                         break
                     else:
                         round_input = []
             elif message['role'] == 'assistant':
+                content_tokens = tokenizer.encode(message['content'])
+                round_input += content_tokens + [self.generation_config.eos_token_id]
             else:
                 raise ValueError(f"message role not supported yet: {message['role']}")
         total_input = total_input[-max_input_tokens:]  # truncate left
+        # total_input.append(self.generation_config.eos_token_id)
         total_input = torch.LongTensor([total_input]).to(self.device)
         return total_input
     @torch.no_grad()
+    def chat(self, tokenizer, messages: List[dict], stream=False, system_prompt="",
              generation_config: Optional[GenerationConfig]=None):
         generation_config = generation_config or self.generation_config
+        input_ids = self._build_chat_input(tokenizer, messages, generation_config.max_new_tokens, system_prompt)
         if stream:
             from transformers_stream_generator.main import NewGenerationMixin, StreamGenerationConfig
             self.__class__.generate = NewGenerationMixin.generate