Update processing_qwen2_ts.py to work with the latest vllm patch for ChatTS support. (#16)

Browse files

- Update processing_qwen2_ts.py to work with the latest vllm patch for ChatTS support. (d9c80001adf853e6d9c275b6e981b8d352ee0e5f)

Co-authored-by: Alexander Chemeris <[email protected]>

Files changed (1) hide show

processing_qwen2_ts.py +55 -36

processing_qwen2_ts.py CHANGED Viewed

@@ -91,45 +91,62 @@ class Qwen2TSProcessor(ProcessorMixin):
         if timeseries is None:
             timeseries = []
-        encoded_ts_arrays = []
         reconstructed_prompts = []
-        total_ts_cnt = 0
-        for idx, prompt in enumerate(text):
-            # Split prompt by <ts><ts/> placeholders
-            last_ts_cnt = total_ts_cnt
-            prompt_segments = prompt.split("<ts><ts/>")
-            total_ts_cnt = total_ts_cnt + len(prompt_segments) - 1
-            # Encode each time series and rebuild the prompt
-            reconstructed_prompt = prompt_segments[0]
-            for i, ts in enumerate(timeseries[last_ts_cnt:total_ts_cnt]):
-                encoded_ts, ts_prompt, _ = sp_encoding(ts, eots_token=not vllm_flag)
-                reconstructed_prompt += ts_prompt + prompt_segments[i + 1]
-                # Ensure time series shape [1, seq_len, feature_dim] for batch concatenation
-                encoded_ts_arrays.append(encoded_ts[None, ...])
-            reconstructed_prompts.append(reconstructed_prompt)
-        if len(timeseries) != len(encoded_ts_arrays):
-            raise ValueError(
-                f"Mismatch between <ts><ts/> placeholders ({total_ts_cnt}) "
-                f"and time series ({len(encoded_ts_arrays)})."
-            )
-        if len(encoded_ts_arrays) > 0:
-            # Pad time series to the same length
-            max_length = max(ts.shape[1] for ts in encoded_ts_arrays)
-            padded_ts_arrays = [
-                np.pad(ts, ((0, 0), (0, max_length - ts.shape[1]), (0, 0)), mode="constant", constant_values=0.0)
-                for ts in encoded_ts_arrays
-            ]
-            concatenated_ts = np.concatenate(padded_ts_arrays, axis=0)  # Shape: [batch_size, max_length, feature_dim]
-            # Convert to torch
-            concatenated_ts = torch.from_numpy(concatenated_ts).half()
         else:
-            concatenated_ts = None
         # Tokenize the processed prompt
         tokenizer_outputs = {}
@@ -138,7 +155,9 @@ class Qwen2TSProcessor(ProcessorMixin):
         # Create the final output
         outputs = tokenizer_outputs
-        if concatenated_ts is not None:
             outputs["timeseries"] = concatenated_ts
         return BatchFeature(data=outputs)

         if timeseries is None:
             timeseries = []
         reconstructed_prompts = []
+        concatenated_ts = None
+        ts_tokens = []
+        if vllm_flag:
+            # All prompt modifications have to be done inside of the vLLM
+            # to work correctly with its caching mechanism.
+            reconstructed_prompts = text
+            # Process timeseries data
+            encoded_ts_arrays = []
+            for ts in timeseries:
+                # Get the normalized data and prompt text
+                encoded_ts, ts_prompt, _ = sp_encoding(ts, eots_token=False)
+                # Tokenize the ts_prompt and add to the tokens list
+                if self.tokenizer is not None:
+                    tokens = self.tokenizer.encode(ts_prompt, add_special_tokens=False)
+                    ts_tokens.append(tokens)
+                encoded_ts_arrays.append(encoded_ts[None, ...])
         else:
+            encoded_ts_arrays = []
+            total_ts_cnt = 0
+            for idx, prompt in enumerate(text):
+                # Split prompt by <ts><ts/> placeholders
+                last_ts_cnt = total_ts_cnt
+                prompt_segments = prompt.split("<ts><ts/>")
+                total_ts_cnt = total_ts_cnt + len(prompt_segments) - 1
+                # Encode each time series and rebuild the prompt
+                reconstructed_prompt = prompt_segments[0]
+                for i, ts in enumerate(timeseries[last_ts_cnt:total_ts_cnt]):
+                    encoded_ts, ts_prompt, _ = sp_encoding(ts, eots_token=not vllm_flag)
+                    reconstructed_prompt += ts_prompt + prompt_segments[i + 1]
+                    # Ensure time series shape [1, seq_len, feature_dim] for batch concatenation
+                    encoded_ts_arrays.append(encoded_ts[None, ...])
+                reconstructed_prompts.append(reconstructed_prompt)
+            if len(timeseries) != len(encoded_ts_arrays):
+                raise ValueError(
+                    f"Mismatch between <ts><ts/> placeholders ({total_ts_cnt}) "
+                    f"and time series ({len(encoded_ts_arrays)})."
+                )
+            if len(encoded_ts_arrays) > 0:
+                # Pad time series to the same length
+                max_length = max(ts.shape[1] for ts in encoded_ts_arrays)
+                padded_ts_arrays = [
+                    np.pad(ts, ((0, 0), (0, max_length - ts.shape[1]), (0, 0)), mode="constant", constant_values=0.0)
+                    for ts in encoded_ts_arrays
+                ]
+                concatenated_ts = np.concatenate(padded_ts_arrays, axis=0)  # Shape: [batch_size, max_length, feature_dim]
+                # Convert to torch
+                concatenated_ts = torch.from_numpy(concatenated_ts).half()
         # Tokenize the processed prompt
         tokenizer_outputs = {}
         # Create the final output
         outputs = tokenizer_outputs
+        if vllm_flag:
+            outputs["timeseries"] = zip(ts_tokens, encoded_ts_arrays)
+        elif concatenated_ts is not None:
             outputs["timeseries"] = concatenated_ts
         return BatchFeature(data=outputs)