Spaces:

Natsha
/

mocap-ai

Build error

Natsha commited on May 10, 2023

Commit

765b3de

1 Parent(s): 74bcef0

Changed the way that the world transforms are stored. They are now stored in order (n_frames, n_features, n_points).

This format can easily be loaded into the PointNet model with batch shape (batch_size, n_features, n_points).
Updated merge_labeled_and_unlabeled_data in utils.py to reflect this.

Files changed (2) hide show

fbx_handler.py +92 -74
utils.py +15 -7

fbx_handler.py CHANGED Viewed

@@ -27,7 +27,6 @@ def center_axis(a: Union[List[float], np.array]) -> np.array:
     _min = np.min(a)
     _max = np.max(a)
     _c = _max - (_max - _min) * 0.5
-    # Center the array by subtracting the centroid.
     a -= _c
     return a
@@ -379,6 +378,35 @@ def get_children_of_parent(parent: fbx.FbxNode) -> List[fbx.FbxNode]:
     return [parent.GetChild(i) for i in range(parent.GetChildCount())]
 class FBXContainerBase:
     def __init__(self, fbx_file: Path, debug: int = -1) -> None:
         """
@@ -693,7 +721,7 @@ class FBXContainer(FBXContainerBase):
         :param r: Custom frame range to extract.
         """
         self.init_labeled_world_transforms(r=r, incl_keyed=1)
-        self.init_unlabeled_world_transforms(r=r)
     def init_labeled_world_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
                                       incl_keyed: int = 1) -> np.array:
@@ -702,7 +730,7 @@ class FBXContainer(FBXContainerBase):
         in r. This can later be used to recreate the world transform matrix.
         :param r: Custom frame range to use.
         :param incl_keyed: `bool` whether to check if the marker was keyed at the frame.
-        :return: `np.array` of shape (n_frames, n_markers, 14).
         """
         r = self.convert_r(r)
         labeled_data = []
@@ -725,21 +753,20 @@ class FBXContainer(FBXContainerBase):
             labeled_data.append(actor_data)
         # Convert the list to a np array. This will have all frames at the last dimension because of this order:
-        # Shape (n_actors, n_markers, 14/15, n_frames).
         wide_layout = np.array(labeled_data)
-        # Transpose the array so that the first dimension is the frames.
-        # Shape (n_frames, n_actors, n_markers, 14).
-        self.labeled_world_transforms = np.transpose(wide_layout, axes=(3, 0, 1, 2))
         return self.labeled_world_transforms
     def init_unlabeled_world_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
-                                        incl_keyed: int = 0) -> np.array:
         """
         For all unlabeled markers, stores a list for each element in the world transform for each frame
         in r. This can later be used to recreate the world transform matrix.
         :param r: Custom frame range to use.
         :param incl_keyed: `bool` whether to check if the marker was keyed at the frame.
-        :return: `np.array` of shape (n_frames, n_unlabeled_markers, 14).
         """
         r = self.convert_r(r)
         unlabeled_data = []
@@ -756,12 +783,10 @@ class FBXContainer(FBXContainerBase):
             self._print(f'Unlabeled marker {ulm.GetName()} done', 1)
         # Convert the list to a np array. This will have all frames at the last dimension because of this order:
-        # Shape (n_unlabeled_markers, 14/15, n_frames).
         wide_layout = np.array(unlabeled_data)
-        # Transpose the array so that the first dimension is the frames.
-        # Shape (n_frames, n_unlabeled_markers, 14).
-        self.unlabeled_world_transforms = np.transpose(wide_layout, axes=(2, 0, 1))
-        # Returns shape (n_frames, n_unlabeled_markers, 14).
         return self.unlabeled_world_transforms
     def init(self) -> None:
@@ -789,15 +814,15 @@ class FBXContainer(FBXContainerBase):
     def remove_clipping_poses(self, arr: np.array) -> np.array:
         """
         Checks for each axis if it does not cross the volume limits. Returns an array without clipping poses.
         :param arr: `np.array` to filter.
         :return: Filtered `np.array` that only has non-clipping poses.
         """
-        mask_x1 = (arr[:, :, 2] < self.hvol_x / self.scale).all(axis=1)
-        mask_x2 = (arr[:, :, 2] > -self.hvol_x / self.scale).all(axis=1)
-        mask_z1 = (arr[:, :, 4] < self.hvol_z / self.scale).all(axis=1)
-        mask_z2 = (arr[:, :, 4] > -self.hvol_z / self.scale).all(axis=1)
         mask = mask_x1 & mask_x2 & mask_z1 & mask_z2
-        # print(mask.shape, mask)
         return arr[mask]
     def extract_training_translations(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None) -> np.array:
@@ -811,32 +836,28 @@ class FBXContainer(FBXContainerBase):
         if self.labeled_world_transforms is None:
             self.init_labeled_world_transforms(r=r, incl_keyed=1)
-        # Returns (n_frames, n_actors, 73, 15).
-        l_shape = self.labeled_world_transforms.shape
-        # Flatten the array, so we get a list of frames.
-        # Reshape to (n_frames * n_actors, 73, 15).
-        flattened = self.labeled_world_transforms.reshape(-1, l_shape[2], l_shape[3])
-        # Isolates the poses with all keyframes present by checking the last elements.
         # Start with the mask.
-        # Returns shape of (n_frames * n_actors, 73, 15).
-        mask = (flattened[..., -1] == 1)
         # We only need a filter for the first dimension, so use .all to check if all markers
         # have a keyframe. This results in shape (n_frames * n_actors,).
         mask = mask.all(axis=1)
-        # Now isolate the right frames with the mask and remove the last element of the last dimension,
-        # because it won't be useful anymore.
-        # Also remove any frames that cross the limits of the volume.
-        valid_poses = self.remove_clipping_poses(flattened[mask][..., :-1])
-        # Now we need to center the tx and tz axes of each individual pose.
-        for valid_pose in valid_poses:
-            for axis in [2, 4]:
-                valid_pose[:, axis] = center_axis(valid_pose[:, axis])
-        # Finally, scale the data to the correct size by normalizing.
-        return self.transform_translations(valid_poses)
     def extract_inf_translations(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
                                  merged: bool = True) -> Union[np.array, Tuple[np.array, np.array]]:
@@ -856,24 +877,20 @@ class FBXContainer(FBXContainerBase):
             # Note: Unlabeled data is already flattened.
             self.init_unlabeled_world_transforms(r=r, incl_keyed=1)
-        # Returns (n_frames, n_actors, 73, 15).
-        ls = self.labeled_world_transforms.shape
         # Flatten the array, so we get a list of frames.
-        # Returns shape (n_frames, 73 * n_actors, 15).
-        flat_labeled = self.labeled_world_transforms.reshape(ls[0], -1, ls[-1])
-        # Find all labeled markers that have their keyed value set to 0 (which means they had no keyframe on tx),
-        # and set their transforms to np.inf.
-        mask = flat_labeled[..., -1] == 0
-        flat_labeled[mask, 2:] = np.inf
-        # Do the same for the unlabeled markers.
-        mask = self.unlabeled_world_transforms[..., -1] == 0
-        self.unlabeled_world_transforms[mask, 2:] = np.inf
-        # Remove the last element of the last dimension of both arrays, because it won't be useful anymore.
-        flat_labeled = flat_labeled[..., :-1]
-        self.unlabeled_world_transforms = self.unlabeled_world_transforms[..., :-1]
         if merged:
             return utils.merge_labeled_and_unlabeled_data(labeled=flat_labeled,
@@ -882,32 +899,28 @@ class FBXContainer(FBXContainerBase):
         else:
             return flat_labeled, self.unlabeled_world_transforms
-    def transform_translations(self, w: np.array) -> np.array:
         """
         Applies a scaling to the translation values in the given array.
-        :param w: `np.array` that can either be a timeline dense cloud or translation vectors.
         :return: Modified `np.array`.
         """
-        # If the last dimension has 3 elements, it is a translation vector of shape (tx, ty, tz).
-        # If it has 14 elements, it is a full marker row of shape (actor, marker, tx, ty, tz, tw, rx, ry, rz, tw, etc).
-        start = 0 if w.shape[-1] == 3 else 2
         # First multiply by self.scale, which turns centimeters to meters.
         # Then divide by volume dimensions, to normalize to the total area of the capture volume.
-        w[..., start + 0] = w[..., start + 0] * self.scale / self.vol_x
-        w[..., start + 1] = w[..., start + 1] * self.scale / self.vol_y
-        w[..., start + 2] = w[..., start + 2] * self.scale / self.vol_z
-        # Then move the x and z to the center of the volume. Y doesn't need to be done because pose needs to stand
-        # on the floor.
-        # We do not add 0.5 here to move the pose to the middle of the capture space,
-        # because in the Dataset we still need to randomly rotate it in world space.
-        # So we keep it centered here.
-        w[..., start + 0] = np.clip(w[..., start + 0], -0.5, 0.5)
-        w[..., start + 1] = np.clip(w[..., start + 1], -0.5, 0.5)
-        w[..., start + 2] = np.clip(w[..., start + 2], -0.5, 0.5)
-        return w
     def get_split_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
                              mode: str = 'train') -> Tuple[np.array, np.array, np.array, np.array, np.array]:
@@ -958,10 +971,10 @@ class FBXContainer(FBXContainerBase):
         else:
             raise ValueError('Invalid file extension. Must be .h5')
-    def export_test_data(self, output_file: Path, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
-                         merged: bool = True) -> Union[np.array, Tuple[np.array, np.array]]:
         """
-        Exports test data to an HDF5 file.
         :param output_file: `Path` to the file.
         :param r: Custom frame range to use.
         :param merged: `bool` whether to merge the test data or output an unlabeled dataset and labeled dataset.
@@ -1150,3 +1163,8 @@ class FBXContainer(FBXContainerBase):
             if actor_dict:
                 self._print(f'Replacing keys for actor {actor_idx}', 1)
                 self.replace_keyframes_per_actor(actor_idx, actor_dict)

     _min = np.min(a)
     _max = np.max(a)
     _c = _max - (_max - _min) * 0.5
     a -= _c
     return a
     return [parent.GetChild(i) for i in range(parent.GetChildCount())]
+def flatten_labeled_transforms(arr: np.array) -> np.array:
+    """
+    Flattens the given array so that it has the shape (n_actors * n_frames, 15, 73).
+    :param arr: `np.array` to process.
+    :return: `np.array` of shape (n_frames * n_actors, 15, 73).
+    """
+    # Transpose the array, so we get this order: (n_actors, n_frames, 15, 73).
+    # That way, we can stack the actors after each other instead of the frames
+    # (which would happen with the previous order).
+    flattened = arr.transpose(1, 0, 2, 3)
+    # Flatten the array, so we get a list of frames where with all actors stacked after each other.
+    # Reshape to (n_frames * n_actors, 15, 73).
+    return np.concatenate(flattened, axis=0)
+def replace_zeros_with_inf(arr: np.array) -> np.array:
+    """
+    Replaces all transform values for each marker on each frame that was not keyed.
+    :param arr: `np.array` to process.
+    :return: `np.array` with updated values.
+    """
+    # Find all labeled markers that have their keyed value set to 0 (which means they had no keyframe on tx),
+    # and set their transforms to np.inf.
+    mask = arr[:, -1] == 0
+    for i in range(arr.shape[0]):
+        arr[i, 2:, mask[i]] = np.inf
+    return arr
 class FBXContainerBase:
     def __init__(self, fbx_file: Path, debug: int = -1) -> None:
         """
         :param r: Custom frame range to extract.
         """
         self.init_labeled_world_transforms(r=r, incl_keyed=1)
+        self.init_unlabeled_world_transforms(r=r, incl_keyed=1)
     def init_labeled_world_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
                                       incl_keyed: int = 1) -> np.array:
         in r. This can later be used to recreate the world transform matrix.
         :param r: Custom frame range to use.
         :param incl_keyed: `bool` whether to check if the marker was keyed at the frame.
+        :return: `np.array` of shape (n_frames, 15, n_markers).
         """
         r = self.convert_r(r)
         labeled_data = []
             labeled_data.append(actor_data)
         # Convert the list to a np array. This will have all frames at the last dimension because of this order:
+        # Shape (n_actors, n_markers, 15, n_frames).
         wide_layout = np.array(labeled_data)
+        # Transpose the array so that the order becomes (n_frames, n_actors, 15, n_markers).
+        self.labeled_world_transforms = np.transpose(wide_layout, axes=(3, 0, 2, 1))
         return self.labeled_world_transforms
     def init_unlabeled_world_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
+                                        incl_keyed: int = 1) -> np.array:
         """
         For all unlabeled markers, stores a list for each element in the world transform for each frame
         in r. This can later be used to recreate the world transform matrix.
         :param r: Custom frame range to use.
         :param incl_keyed: `bool` whether to check if the marker was keyed at the frame.
+        :return: `np.array` of shape (n_frames, 15, n_unlabeled_markers).
         """
         r = self.convert_r(r)
         unlabeled_data = []
             self._print(f'Unlabeled marker {ulm.GetName()} done', 1)
         # Convert the list to a np array. This will have all frames at the last dimension because of this order:
+        # Shape (n_unlabeled_markers, 15, n_frames).
         wide_layout = np.array(unlabeled_data)
+        # Transpose the array so that the order becomes (n_frames, 15, n_unlabeled_markers).
+        self.unlabeled_world_transforms = np.transpose(wide_layout, axes=(2, 1, 0))
         return self.unlabeled_world_transforms
     def init(self) -> None:
     def remove_clipping_poses(self, arr: np.array) -> np.array:
         """
         Checks for each axis if it does not cross the volume limits. Returns an array without clipping poses.
+        This function uses the volume dimensions in cms, so use it before the data is scaled down.
         :param arr: `np.array` to filter.
         :return: Filtered `np.array` that only has non-clipping poses.
         """
+        mask_x1 = (arr[:, 2] < self.hvol_x / self.scale).all(axis=1)
+        mask_x2 = (arr[:, 2] > -self.hvol_x / self.scale).all(axis=1)
+        mask_z1 = (arr[:, 4] < self.hvol_z / self.scale).all(axis=1)
+        mask_z2 = (arr[:, 4] > -self.hvol_z / self.scale).all(axis=1)
         mask = mask_x1 & mask_x2 & mask_z1 & mask_z2
         return arr[mask]
     def extract_training_translations(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None) -> np.array:
         if self.labeled_world_transforms is None:
             self.init_labeled_world_transforms(r=r, incl_keyed=1)
+        flattened = flatten_labeled_transforms(self.labeled_world_transforms)
+        # Isolate the poses with all keyframes present by checking the last elements.
         # Start with the mask.
+        # Returns shape of (n_frames * n_actors, 15, 73).
+        mask = flattened[:, -1] == 1
         # We only need a filter for the first dimension, so use .all to check if all markers
         # have a keyframe. This results in shape (n_frames * n_actors,).
         mask = mask.all(axis=1)
+        # Remove the last element of the second dimension, (was it keyframed or not) because it won't be useful anymore.
+        flattened = flattened[mask][:, :-1]
+        del mask
+        # Remove any frames that cross the limits of the volume.
+        flattened = self.remove_clipping_poses(flattened)
+        for frame in range(flattened.shape[0]):
+            # Center the X axis values.
+            flattened[frame, 2] = center_axis(flattened[frame, 2])
+            # Center the Z axis values.
+            flattened[frame, 4] = center_axis(flattened[frame, 4])
+        return self.transform_translations(flattened)
     def extract_inf_translations(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
                                  merged: bool = True) -> Union[np.array, Tuple[np.array, np.array]]:
             # Note: Unlabeled data is already flattened.
             self.init_unlabeled_world_transforms(r=r, incl_keyed=1)
+        # Starting with (n_frames, n_actors, 15, 73).
         # Flatten the array, so we get a list of frames.
+        # Returns shape (n_frames, 15, n_actors, 73).
+        flat_labeled = self.labeled_world_transforms.transpose(0, 2, 1, 3)
+        # Stack the elements in the last 2 dimension after each other.
+        # Returns shape (n_frames, 15, n_actors * 73).
+        ls = flat_labeled.shape
+        flat_labeled = flat_labeled.reshape(ls[0], ls[1], -1)
+        del ls
+        # Replace non keyed frames with inf and strip off the keyed element, because this is the last time we need it.
+        flat_labeled = replace_zeros_with_inf(flat_labeled)[:, :-1]
+        self.unlabeled_world_transforms = replace_zeros_with_inf(self.unlabeled_world_transforms)[:, :-1]
         if merged:
             return utils.merge_labeled_and_unlabeled_data(labeled=flat_labeled,
         else:
             return flat_labeled, self.unlabeled_world_transforms
+    def transform_translations(self, arr: np.array) -> np.array:
         """
         Applies a scaling to the translation values in the given array.
+        :param arr: `np.array` that can either be a timeline dense cloud or translation vectors.
         :return: Modified `np.array`.
         """
+        # If the second dimension has 3 elements, it is a translation vector of shape (tx, ty, tz).
+        # If it has 14 elements, it is a full marker row of shape (actor, marker, tx, ty, tz, tw, rx, ry, rz, rw, etc.).
+        start = 0 if arr.shape[1] == 3 else 2
         # First multiply by self.scale, which turns centimeters to meters.
         # Then divide by volume dimensions, to normalize to the total area of the capture volume.
+        arr[:, start + 0] *= self.scale / self.vol_x
+        arr[:, start + 1] *= self.scale / self.vol_y
+        arr[:, start + 2] *= self.scale / self.vol_z
+        # Optional: Clip the translation values.
+        # arr[:, start + 0] = np.clip(arr[:, start + 0], -0.5, 0.5)
+        # arr[:, start + 1] = np.clip(arr[:, start + 1], -0.5, 0.5)
+        # arr[:, start + 2] = np.clip(arr[:, start + 2], -0.5, 0.5)
+        return arr
     def get_split_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
                              mode: str = 'train') -> Tuple[np.array, np.array, np.array, np.array, np.array]:
         else:
             raise ValueError('Invalid file extension. Must be .h5')
+    def export_inf_data(self, output_file: Path, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
+                        merged: bool = True) -> Union[np.array, Tuple[np.array, np.array]]:
         """
+        Exports inference data to an HDF5 file.
         :param output_file: `Path` to the file.
         :param r: Custom frame range to use.
         :param merged: `bool` whether to merge the test data or output an unlabeled dataset and labeled dataset.
             if actor_dict:
                 self._print(f'Replacing keys for actor {actor_idx}', 1)
                 self.replace_keyframes_per_actor(actor_idx, actor_dict)
+# if __name__ == '__main__':
+#     np.printoptions(precision=2, suppress=True)
+#     # container = FBXContainer(Path(r'G:\Firestorm\mocap-ai\data\fbx\dowg\TAKE_01+1_ALL_001.fbx'))
+#     container = FBXContainer(Path('G:/Firestorm/mocap-ai/data/fbx/mes-1/DressingRoom_1_001.fbx'), pc_size=600)

utils.py CHANGED Viewed

@@ -65,20 +65,28 @@ def combined_test_h5_to_array4d(input_file: Path, pc_size: int = 1024) -> np.arr
         return np.vstack(data)
-def merge_labeled_and_unlabeled_data(labeled: np.array, unlabeled: np.array, pc_size: int) -> np.array:
-    missing = pc_size - (labeled.shape[1] + unlabeled.shape[1])
     if missing <= 0:
         # Returns shape (n_frames, self.pc_size, 14).
-        return np.concatenate((unlabeled, labeled), axis=1)[:, -pc_size:]
-    missing_markers = np.random.rand(labeled.shape[0], missing, labeled.shape[-1])
-    missing_markers[:, :, 0] = 0.
-    missing_markers[:, :, 1] = 0.
     # Returns shape (n_frames, self.pc_size, 14).
     return np.concatenate((missing_markers,
                            unlabeled,
-                           labeled), axis=1)
 class Timer:

         return np.vstack(data)
+def merge_labeled_and_unlabeled_data(labeled: np.array, unlabeled: np.array, pc_size: int,
+                                     augment: str = None) -> np.array:
+    missing = pc_size - (labeled.shape[2] + unlabeled.shape[2])
     if missing <= 0:
         # Returns shape (n_frames, self.pc_size, 14).
+        return np.concatenate((unlabeled, labeled), axis=2)[:, :, -pc_size:]
+    # This is similar to the way that fill_point_cloud() fills values.
+    if augment is None:
+        missing_markers = np.ones((labeled.shape[0], labeled.shape[1], missing))
+    elif augment == 'normal':
+        missing_markers = np.random.rand(labeled.shape[0], labeled.shape[1], missing)
+    else:
+        missing_markers = np.zeros((labeled.shape[0], labeled.shape[1], missing))
+    missing_markers[:, 0] = 0.
+    missing_markers[:, 1] = 0.
     # Returns shape (n_frames, self.pc_size, 14).
     return np.concatenate((missing_markers,
                            unlabeled,
+                           labeled), axis=2)
 class Timer: