Spaces:

Natsha
/

mocap-ai

Build error

App Files Files Community

Natsha commited on May 16, 2023

Commit

0514416

1 Parent(s): 765b3de

Added docs and fixed the math for the data augmentation.

Browse files

Files changed (7) hide show

fbx_handler.md +2 -1
fbx_handler.py +72 -74
globals.py +17 -0
labeler/data_setup.py +636 -136
preprocess_files.py +9 -12
requirements.txt +4 -1
utils.py +5 -18

fbx_handler.md CHANGED Viewed

@@ -26,13 +26,14 @@ actors_train, markers_train, t_test, _, _ = container.get_split_transforms(mode=
 ```
 ## Testing workflow:
 ```python
 # Load file.
 container = FBXContainer(input_file)
 # Get splitted original data (no transforms applied).
 actors_test, markers_test, t_test, r_test_, s_test = container.get_split_transforms(mode='test')
 # Predict the new actors and classes...
-actors_pred, markers_pred = Labeler(container.transform_translations(t_test))
 # Merge the new labels with their original translations.
 merged = merge_tdc(actors_pred, markers_pred, t_test, r_test, s_test)
 # Convert the full cloud into a dict structured for easy keyframes.

 ```
 ## Testing workflow:
 ```python
 # Load file.
 container = FBXContainer(input_file)
 # Get splitted original data (no transforms applied).
 actors_test, markers_test, t_test, r_test_, s_test = container.get_split_transforms(mode='test')
 # Predict the new actors and classes...
+actors_pred, markers_pred = Labeler(scale_translations(t_test))
 # Merge the new labels with their original translations.
 merged = merge_tdc(actors_pred, markers_pred, t_test, r_test, s_test)
 # Convert the full cloud into a dict structured for easy keyframes.

fbx_handler.py CHANGED Viewed

@@ -22,7 +22,6 @@ def center_axis(a: Union[List[float], np.array]) -> np.array:
     # Turn list into np array for optimized math.
     if not isinstance(a, np.ndarray):
         a = np.array(a)
     # Find the centroid by subtracting the lowest value from the highest value.
     _min = np.min(a)
     _max = np.max(a)
@@ -296,7 +295,7 @@ def get_keyed_frames_from_curve(curve: fbx.FbxAnimCurve, length: int = -1) -> Li
 def get_world_transforms(actor_idx: int, marker_idx: int, m: fbx.FbxNode,
-                         r: List[int], c: fbx.FbxAnimCurve, incl_keyed: int = 1) -> List[List[float]]:
     """
     For the given marker node, gets the world transform for each frame in r, and stores the translation, rotation
     and scaling values as a list of lists. Stores the actor and marker classes at the start of this list of lists.
@@ -308,7 +307,6 @@ def get_world_transforms(actor_idx: int, marker_idx: int, m: fbx.FbxNode,
     :param m: `fbx.FbxNode` to evaluate the world transform of at each frame.
     :param r: `List[int]` list of frame numbers to evaluate the world transform at.
     :param c: `fbx.FbxAnimCurve` node to read the keyframes from.
-    :param incl_keyed: `bool` whether to include if there was a key on a given frame or not. 0 if not.
     :return:
     """
     # Create a list of zeros with the same length as r.
@@ -341,19 +339,7 @@ def get_world_transforms(actor_idx: int, marker_idx: int, m: fbx.FbxNode,
         sy.append(wts[1])
         sz.append(wts[2])
-    # If we don't need to include keyed frames, return the list of lists as is.
-    if not incl_keyed:
-        return [
-            actors,
-            markers,
-            tx, ty, tz, zeros,
-            rx, ry, rz, zeros,
-            sx, sy, sz, ones
-        ]
-    # However, if we do need those keys, we first retrieve all the keyframed frame numbers from the curve.
-    # Note: We do this after returning the previous results, because the following lines are very slow
-    # and unnecessary for inference.
     keyed_frames = get_keyed_frames_from_curve(c)
     # Then we check if any of the frame numbers are in the keyed frames, which means it had a keyframe and should be 1.
     keyed_bools = [1 if f in keyed_frames else 0 for f in r]
@@ -365,7 +351,7 @@ def get_world_transforms(actor_idx: int, marker_idx: int, m: fbx.FbxNode,
         tx, ty, tz, zeros,
         rx, ry, rz, zeros,
         sx, sy, sz, ones,
-        keyed_bools
     ]
@@ -382,14 +368,14 @@ def flatten_labeled_transforms(arr: np.array) -> np.array:
     """
     Flattens the given array so that it has the shape (n_actors * n_frames, 15, 73).
     :param arr: `np.array` to process.
-    :return: `np.array` of shape (n_frames * n_actors, 15, 73).
     """
     # Transpose the array, so we get this order: (n_actors, n_frames, 15, 73).
     # That way, we can stack the actors after each other instead of the frames
     # (which would happen with the previous order).
     flattened = arr.transpose(1, 0, 2, 3)
     # Flatten the array, so we get a list of frames where with all actors stacked after each other.
-    # Reshape to (n_frames * n_actors, 15, 73).
     return np.concatenate(flattened, axis=0)
@@ -403,10 +389,52 @@ def replace_zeros_with_inf(arr: np.array) -> np.array:
     # and set their transforms to np.inf.
     mask = arr[:, -1] == 0
     for i in range(arr.shape[0]):
-        arr[i, 2:, mask[i]] = np.inf
     return arr
 class FBXContainerBase:
     def __init__(self, fbx_file: Path, debug: int = -1) -> None:
         """
@@ -430,7 +458,7 @@ class FBXContainerBase:
         """
         # Create an FBX manager and importer.
         self.manager = fbx.FbxManager.Create()
-        importer = fbx.FbxImporter.Create(self.manager, '')
         # Import the FBX file.
         importer.Initialize(str(self.input_fbx))
@@ -720,16 +748,14 @@ class FBXContainer(FBXContainerBase):
         Calls the init functions for the labeled and unlabeled world transforms.
         :param r: Custom frame range to extract.
         """
-        self.init_labeled_world_transforms(r=r, incl_keyed=1)
-        self.init_unlabeled_world_transforms(r=r, incl_keyed=1)
-    def init_labeled_world_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
-                                      incl_keyed: int = 1) -> np.array:
         """
         For each actor, for each marker, stores a list for each element in the world transform for each frame
         in r. This can later be used to recreate the world transform matrix.
         :param r: Custom frame range to use.
-        :param incl_keyed: `bool` whether to check if the marker was keyed at the frame.
         :return: `np.array` of shape (n_frames, 15, n_markers).
         """
         r = self.convert_r(r)
@@ -745,7 +771,7 @@ class FBXContainer(FBXContainerBase):
                 # This requires the animation layer, so we can't do it within the function itself.
                 curve = m.LclTranslation.GetCurve(self.anim_layer, 'X', True)
                 # Get a list of each world transform element for all frames.
-                marker_data = get_world_transforms(actor_idx + 1, marker_idx + 1, m, r, curve, incl_keyed)
                 # Add the result to actor_data.
                 actor_data.append(marker_data)
                 self._print(f'Actor {actor_idx} marker {marker_idx} done', 1)
@@ -753,19 +779,17 @@ class FBXContainer(FBXContainerBase):
             labeled_data.append(actor_data)
         # Convert the list to a np array. This will have all frames at the last dimension because of this order:
-        # Shape (n_actors, n_markers, 15, n_frames).
         wide_layout = np.array(labeled_data)
-        # Transpose the array so that the order becomes (n_frames, n_actors, 15, n_markers).
         self.labeled_world_transforms = np.transpose(wide_layout, axes=(3, 0, 2, 1))
         return self.labeled_world_transforms
-    def init_unlabeled_world_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
-                                        incl_keyed: int = 1) -> np.array:
         """
         For all unlabeled markers, stores a list for each element in the world transform for each frame
         in r. This can later be used to recreate the world transform matrix.
         :param r: Custom frame range to use.
-        :param incl_keyed: `bool` whether to check if the marker was keyed at the frame.
         :return: `np.array` of shape (n_frames, 15, n_unlabeled_markers).
         """
         r = self.convert_r(r)
@@ -777,15 +801,15 @@ class FBXContainer(FBXContainerBase):
             # This requires the animation layer, so we can't do it within the function itself.
             curve = ulm.LclTranslation.GetCurve(self.anim_layer, 'X', True)
             # Get a list of each world transform element for all frames.
-            marker_data = get_world_transforms(0, 0, ulm, r, curve, incl_keyed=incl_keyed)
             # Add the result to marker_data.
             unlabeled_data.append(marker_data)
             self._print(f'Unlabeled marker {ulm.GetName()} done', 1)
         # Convert the list to a np array. This will have all frames at the last dimension because of this order:
-        # Shape (n_unlabeled_markers, 15, n_frames).
         wide_layout = np.array(unlabeled_data)
-        # Transpose the array so that the order becomes (n_frames, 15, n_unlabeled_markers).
         self.unlabeled_world_transforms = np.transpose(wide_layout, axes=(2, 1, 0))
         return self.unlabeled_world_transforms
@@ -825,21 +849,23 @@ class FBXContainer(FBXContainerBase):
         mask = mask_x1 & mask_x2 & mask_z1 & mask_z2
         return arr[mask]
-    def extract_training_translations(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None) -> np.array:
         """
         Manipulates the existing labeled world transform array into one that is suitable for training.
         It does this through flattening the array to shape (n_frames, n_actors * 73, 15), then removing
         all clipping frames and finally transforms the frames to the right location and scale.
         :param r: Custom frame range to use if the labeled transforms are not stored yet.
         :return: Transformed labeled world transforms.
         """
         if self.labeled_world_transforms is None:
-            self.init_labeled_world_transforms(r=r, incl_keyed=1)
         flattened = flatten_labeled_transforms(self.labeled_world_transforms)
         # Isolate the poses with all keyframes present by checking the last elements.
         # Start with the mask.
-        # Returns shape of (n_frames * n_actors, 15, 73).
         mask = flattened[:, -1] == 1
         # We only need a filter for the first dimension, so use .all to check if all markers
         # have a keyframe. This results in shape (n_frames * n_actors,).
@@ -851,13 +877,7 @@ class FBXContainer(FBXContainerBase):
         # Remove any frames that cross the limits of the volume.
         flattened = self.remove_clipping_poses(flattened)
-        for frame in range(flattened.shape[0]):
-            # Center the X axis values.
-            flattened[frame, 2] = center_axis(flattened[frame, 2])
-            # Center the Z axis values.
-            flattened[frame, 4] = center_axis(flattened[frame, 4])
-        return self.transform_translations(flattened)
     def extract_inf_translations(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
                                  merged: bool = True) -> Union[np.array, Tuple[np.array, np.array]]:
@@ -872,18 +892,18 @@ class FBXContainer(FBXContainerBase):
         # If either of the arrays is None, we can initialize them with r.
         if self.labeled_world_transforms is None:
             # For inference, we don't need keyed frames, so incl_keyed is False.
-            self.init_labeled_world_transforms(r=r, incl_keyed=1)
         if self.unlabeled_world_transforms is None:
             # Note: Unlabeled data is already flattened.
-            self.init_unlabeled_world_transforms(r=r, incl_keyed=1)
-        # Starting with (n_frames, n_actors, 15, 73).
         # Flatten the array, so we get a list of frames.
-        # Returns shape (n_frames, 15, n_actors, 73).
         flat_labeled = self.labeled_world_transforms.transpose(0, 2, 1, 3)
         # Stack the elements in the last 2 dimension after each other.
-        # Returns shape (n_frames, 15, n_actors * 73).
         ls = flat_labeled.shape
         flat_labeled = flat_labeled.reshape(ls[0], ls[1], -1)
         del ls
@@ -899,29 +919,6 @@ class FBXContainer(FBXContainerBase):
         else:
             return flat_labeled, self.unlabeled_world_transforms
-    def transform_translations(self, arr: np.array) -> np.array:
-        """
-        Applies a scaling to the translation values in the given array.
-        :param arr: `np.array` that can either be a timeline dense cloud or translation vectors.
-        :return: Modified `np.array`.
-        """
-        # If the second dimension has 3 elements, it is a translation vector of shape (tx, ty, tz).
-        # If it has 14 elements, it is a full marker row of shape (actor, marker, tx, ty, tz, tw, rx, ry, rz, rw, etc.).
-        start = 0 if arr.shape[1] == 3 else 2
-        # First multiply by self.scale, which turns centimeters to meters.
-        # Then divide by volume dimensions, to normalize to the total area of the capture volume.
-        arr[:, start + 0] *= self.scale / self.vol_x
-        arr[:, start + 1] *= self.scale / self.vol_y
-        arr[:, start + 2] *= self.scale / self.vol_z
-        # Optional: Clip the translation values.
-        # arr[:, start + 0] = np.clip(arr[:, start + 0], -0.5, 0.5)
-        # arr[:, start + 1] = np.clip(arr[:, start + 1], -0.5, 0.5)
-        # arr[:, start + 2] = np.clip(arr[:, start + 2], -0.5, 0.5)
-        return arr
     def get_split_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
                              mode: str = 'train') -> Tuple[np.array, np.array, np.array, np.array, np.array]:
         """
@@ -959,7 +956,7 @@ class FBXContainer(FBXContainerBase):
         Exports train data to an HDF5 file.
         :param output_file: `Path` to the file.
         :param r: Custom frame range to use.
-        :return: `np.array` of shape (n_poses, 73, 14) of train data.
         """
         if output_file.suffix == '.h5':
             array_4d = self.extract_training_translations(r)
@@ -1164,6 +1161,7 @@ class FBXContainer(FBXContainerBase):
                 self._print(f'Replacing keys for actor {actor_idx}', 1)
                 self.replace_keyframes_per_actor(actor_idx, actor_dict)
 # if __name__ == '__main__':
 #     np.printoptions(precision=2, suppress=True)
 #     # container = FBXContainer(Path(r'G:\Firestorm\mocap-ai\data\fbx\dowg\TAKE_01+1_ALL_001.fbx'))

     # Turn list into np array for optimized math.
     if not isinstance(a, np.ndarray):
         a = np.array(a)
     # Find the centroid by subtracting the lowest value from the highest value.
     _min = np.min(a)
     _max = np.max(a)
 def get_world_transforms(actor_idx: int, marker_idx: int, m: fbx.FbxNode,
+                         r: List[int], c: fbx.FbxAnimCurve) -> List[List[float]]:
     """
     For the given marker node, gets the world transform for each frame in r, and stores the translation, rotation
     and scaling values as a list of lists. Stores the actor and marker classes at the start of this list of lists.
     :param m: `fbx.FbxNode` to evaluate the world transform of at each frame.
     :param r: `List[int]` list of frame numbers to evaluate the world transform at.
     :param c: `fbx.FbxAnimCurve` node to read the keyframes from.
     :return:
     """
     # Create a list of zeros with the same length as r.
         sy.append(wts[1])
         sz.append(wts[2])
+    # Get the keyed values.
     keyed_frames = get_keyed_frames_from_curve(c)
     # Then we check if any of the frame numbers are in the keyed frames, which means it had a keyframe and should be 1.
     keyed_bools = [1 if f in keyed_frames else 0 for f in r]
         tx, ty, tz, zeros,
         rx, ry, rz, zeros,
         sx, sy, sz, ones,
+        r, keyed_bools
     ]
     """
     Flattens the given array so that it has the shape (n_actors * n_frames, 15, 73).
     :param arr: `np.array` to process.
+    :return: `np.array` of shape (n_actors * n_frames, 15, 73).
     """
     # Transpose the array, so we get this order: (n_actors, n_frames, 15, 73).
     # That way, we can stack the actors after each other instead of the frames
     # (which would happen with the previous order).
     flattened = arr.transpose(1, 0, 2, 3)
     # Flatten the array, so we get a list of frames where with all actors stacked after each other.
+    # Reshapes to (n_actors * n_frames, 15, 73).
     return np.concatenate(flattened, axis=0)
     # and set their transforms to np.inf.
     mask = arr[:, -1] == 0
     for i in range(arr.shape[0]):
+        arr[i, 2:-2, mask[i]] = np.inf
     return arr
+def scale_translations(arr: np.array, scale: float = 0.01,
+                       dims: Tuple[float, float, float] = (10., 10., 10.)) -> np.array:
+    """
+    Applies a scaling to the translation values in the given array.
+    :param arr: `np.array` that can either be a timeline dense cloud or translation vectors.
+    :param scale: `float` scaling factor.
+    :param dims: `tuple` of `float` values that determine the dimensions of the volume.
+    :return: Modified `np.array`.
+    """
+    # If the second dimension has 3 elements, it is a translation vector of shape (tx, ty, tz).
+    # If it has 15 elements, it is a full marker row of shape (actor, marker, tx, ty, tz, tw, rx, ry, rz, rw, etc.).
+    start = 0 if arr.shape[0] == 3 else 2
+    # First multiply by self.scale, which turns centimeters to meters.
+    # Then divide by volume dimensions, to normalize to the total area of the capture volume.
+    arr[:, start + 0] *= scale / dims[0]
+    arr[:, start + 1] *= scale / dims[1]
+    arr[:, start + 2] *= scale / dims[2]
+    return arr
+def transform_translations(arr: np.array, move_to_center: bool = True,
+                           scale: float = 0.01, dims: Tuple[float, float, float] = (10., 10., 10.)) -> np.array:
+    """
+    First moves the x and y values to their axis' center. Then scales all values to normalize them.
+    :param arr: `np.array` that can either be a timeline dense cloud or translation vectors.
+    :param move_to_center: Uses center_axis() to move the x and y translations to the center of their axes.
+    :param scale: `float` scaling factor.
+    :param dims: `tuple` of `float` values that determine the dimensions of the volume.
+    :return: Modified `np.array`.
+    """
+    if move_to_center:
+        for frame in range(arr.shape[0]):
+            # Center the X axis values.
+            arr[frame, 2] = center_axis(arr[frame, 2])
+            # Center the Z axis values.
+            arr[frame, 4] = center_axis(arr[frame, 4])
+    return scale_translations(arr, scale, dims)
 class FBXContainerBase:
     def __init__(self, fbx_file: Path, debug: int = -1) -> None:
         """
         """
         # Create an FBX manager and importer.
         self.manager = fbx.FbxManager.Create()
+        importer = fbx.FbxImporter.Create(self.manager, 'MyScene')
         # Import the FBX file.
         importer.Initialize(str(self.input_fbx))
         Calls the init functions for the labeled and unlabeled world transforms.
         :param r: Custom frame range to extract.
         """
+        self.init_labeled_world_transforms(r=r)
+        self.init_unlabeled_world_transforms(r=r)
+    def init_labeled_world_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None) -> np.array:
         """
         For each actor, for each marker, stores a list for each element in the world transform for each frame
         in r. This can later be used to recreate the world transform matrix.
         :param r: Custom frame range to use.
         :return: `np.array` of shape (n_frames, 15, n_markers).
         """
         r = self.convert_r(r)
                 # This requires the animation layer, so we can't do it within the function itself.
                 curve = m.LclTranslation.GetCurve(self.anim_layer, 'X', True)
                 # Get a list of each world transform element for all frames.
+                marker_data = get_world_transforms(actor_idx + 1, marker_idx + 1, m, r, curve)
                 # Add the result to actor_data.
                 actor_data.append(marker_data)
                 self._print(f'Actor {actor_idx} marker {marker_idx} done', 1)
             labeled_data.append(actor_data)
         # Convert the list to a np array. This will have all frames at the last dimension because of this order:
+        # Shape (n_actors, n_markers, 16, n_frames).
         wide_layout = np.array(labeled_data)
+        # Transpose the array so that the order becomes (n_frames, n_actors, 16, n_markers).
         self.labeled_world_transforms = np.transpose(wide_layout, axes=(3, 0, 2, 1))
         return self.labeled_world_transforms
+    def init_unlabeled_world_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None) -> np.array:
         """
         For all unlabeled markers, stores a list for each element in the world transform for each frame
         in r. This can later be used to recreate the world transform matrix.
         :param r: Custom frame range to use.
         :return: `np.array` of shape (n_frames, 15, n_unlabeled_markers).
         """
         r = self.convert_r(r)
             # This requires the animation layer, so we can't do it within the function itself.
             curve = ulm.LclTranslation.GetCurve(self.anim_layer, 'X', True)
             # Get a list of each world transform element for all frames.
+            marker_data = get_world_transforms(0, 0, ulm, r, curve)
             # Add the result to marker_data.
             unlabeled_data.append(marker_data)
             self._print(f'Unlabeled marker {ulm.GetName()} done', 1)
         # Convert the list to a np array. This will have all frames at the last dimension because of this order:
+        # Shape (n_unlabeled_markers, 16, n_frames).
         wide_layout = np.array(unlabeled_data)
+        # Transpose the array so that the order becomes (n_frames, 16, n_unlabeled_markers).
         self.unlabeled_world_transforms = np.transpose(wide_layout, axes=(2, 1, 0))
         return self.unlabeled_world_transforms
         mask = mask_x1 & mask_x2 & mask_z1 & mask_z2
         return arr[mask]
+    def extract_training_translations(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
+                                      move_to_center: bool = True) -> np.array:
         """
         Manipulates the existing labeled world transform array into one that is suitable for training.
         It does this through flattening the array to shape (n_frames, n_actors * 73, 15), then removing
         all clipping frames and finally transforms the frames to the right location and scale.
         :param r: Custom frame range to use if the labeled transforms are not stored yet.
+        :param move_to_center: If True, the x and y axes is moved to the center of the volume.
         :return: Transformed labeled world transforms.
         """
         if self.labeled_world_transforms is None:
+            self.init_labeled_world_transforms(r=r)
         flattened = flatten_labeled_transforms(self.labeled_world_transforms)
         # Isolate the poses with all keyframes present by checking the last elements.
         # Start with the mask.
+        # Returns shape of (n_frames * n_actors, 16, 73).
         mask = flattened[:, -1] == 1
         # We only need a filter for the first dimension, so use .all to check if all markers
         # have a keyframe. This results in shape (n_frames * n_actors,).
         # Remove any frames that cross the limits of the volume.
         flattened = self.remove_clipping_poses(flattened)
+        return transform_translations(flattened, move_to_center, self.scale, (self.vol_x, self.vol_y, self.vol_z))
     def extract_inf_translations(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
                                  merged: bool = True) -> Union[np.array, Tuple[np.array, np.array]]:
         # If either of the arrays is None, we can initialize them with r.
         if self.labeled_world_transforms is None:
             # For inference, we don't need keyed frames, so incl_keyed is False.
+            self.init_labeled_world_transforms(r=r)
         if self.unlabeled_world_transforms is None:
             # Note: Unlabeled data is already flattened.
+            self.init_unlabeled_world_transforms(r=r)
+        # Starting with (n_frames, n_actors, 16, 73).
         # Flatten the array, so we get a list of frames.
+        # Returns shape (n_frames, 16, n_actors, 73).
         flat_labeled = self.labeled_world_transforms.transpose(0, 2, 1, 3)
         # Stack the elements in the last 2 dimension after each other.
+        # Returns shape (n_frames, 16, n_actors * 73).
         ls = flat_labeled.shape
         flat_labeled = flat_labeled.reshape(ls[0], ls[1], -1)
         del ls
         else:
             return flat_labeled, self.unlabeled_world_transforms
     def get_split_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
                              mode: str = 'train') -> Tuple[np.array, np.array, np.array, np.array, np.array]:
         """
         Exports train data to an HDF5 file.
         :param output_file: `Path` to the file.
         :param r: Custom frame range to use.
+        :return: `np.array` of shape (n_poses, 14, 73) of train data.
         """
         if output_file.suffix == '.h5':
             array_4d = self.extract_training_translations(r)
                 self._print(f'Replacing keys for actor {actor_idx}', 1)
                 self.replace_keyframes_per_actor(actor_idx, actor_dict)
 # if __name__ == '__main__':
 #     np.printoptions(precision=2, suppress=True)
 #     # container = FBXContainer(Path(r'G:\Firestorm\mocap-ai\data\fbx\dowg\TAKE_01+1_ALL_001.fbx'))

globals.py CHANGED Viewed

@@ -13,3 +13,20 @@ def get_marker_names():
             'RIDX3', 'RIDX6', 'RMID0', 'RMID6', 'RRNG3', 'RRNG6', 'RPNK3', 'RPNK6', 'LFWT', 'MFWT',
             'RFWT', 'LBWT', 'MBWT', 'RBWT', 'LTHI', 'LKNE', 'LKNI', 'LSHN', 'LANK', 'LHEL', 'LMT5',
             'LMT1', 'LTOE', 'RTHI', 'RKNE', 'RKNI', 'RSHN', 'RANK', 'RHEL', 'RMT5', 'RMT1', 'RTOE')

             'RIDX3', 'RIDX6', 'RMID0', 'RMID6', 'RRNG3', 'RRNG6', 'RPNK3', 'RPNK6', 'LFWT', 'MFWT',
             'RFWT', 'LBWT', 'MBWT', 'RBWT', 'LTHI', 'LKNE', 'LKNI', 'LSHN', 'LANK', 'LHEL', 'LMT5',
             'LMT1', 'LTOE', 'RTHI', 'RKNE', 'RKNI', 'RSHN', 'RANK', 'RHEL', 'RMT5', 'RMT1', 'RTOE')
+def get_joint_names():
+    return ('Hips', 'Spine', 'Spine1', 'Spine2', 'Spine3', 'Neck', 'Neck1', 'Head', 'HeadEnd',
+            'RightShoulder', 'RightArm', 'RightForeArm', 'RightHand', 'RightHandMiddle1',
+            'RightHandMiddle2', 'RightHandMiddle3', 'RightHandMiddle4', 'RightHandRing',
+            'RightHandRing1', 'RightHandRing2', 'RightHandRing3', 'RightHandRing4', 'RightHandPinky',
+            'RightHandPinky1', 'RightHandPinky2', 'RightHandPinky3', 'RightHandPinky4', 'RightHandIndex',
+            'RightHandIndex1', 'RightHandIndex2', 'RightHandIndex3', 'RightHandIndex4', 'RightHandThumb1',
+            'RightHandThumb2', 'RightHandThumb3', 'RightHandThumb4', 'LeftShoulder', 'LeftArm',
+            'LeftForeArm', 'LeftHand', 'LeftHandMiddle1', 'LeftHandMiddle2', 'LeftHandMiddle3',
+            'LeftHandMiddle4', 'LeftHandRing', 'LeftHandRing1', 'LeftHandRing2', 'LeftHandRing3',
+            'LeftHandRing4', 'LeftHandPinky', 'LeftHandPinky1', 'LeftHandPinky2', 'LeftHandPinky3',
+            'LeftHandPinky4', 'LeftHandIndex', 'LeftHandIndex1', 'LeftHandIndex2', 'LeftHandIndex3',
+            'LeftHandIndex4', 'LeftHandThumb1', 'LeftHandThumb2', 'LeftHandThumb3', 'LeftHandThumb4',
+            'RightUpLeg', 'RightLeg', 'RightFoot', 'RightToeBase', 'RightToeBaseEnd', 'LeftUpLeg',
+            'LeftLeg', 'LeftFoot', 'LeftToeBase', 'LeftToeBaseEnd')

labeler/data_setup.py CHANGED Viewed

@@ -1,161 +1,661 @@
 from pathlib import Path
-from typing import Tuple
 import numpy as np
 import torch
-from torch.utils import data
-import math
-def apply_random_y_rotation(point_cloud_data: torch.Tensor) -> torch.Tensor:
-    # Convert the random angle from degrees to radians
-    angle = (torch.rand(1).item() * 2 - 1) * 180 * torch.tensor(math.pi / 180, device='cuda')
     # Create the rotation matrix for the y-axis
-    rotation_matrix = torch.tensor([[torch.cos(angle), 0, torch.sin(angle)],
-                                    [0, 1, 0],
-                                    [-torch.sin(angle), 0, torch.cos(angle)]], device='cuda')
-    # Apply the rotation to the point cloud data
-    return torch.matmul(point_cloud_data, rotation_matrix.T)
-class PointCloudDataset(data.Dataset):
-    def __init__(self, file: Path,
-                 n_samples=100,
                  max_actors: int = 8,
-                 translation_factor=0.1,
-                 max_overlap: Tuple[float] = (0.2, 0.2, 0.2)):
-        point_clouds_np = torch.tensor(np.load(str(file)), dtype=torch.float32, device='cuda')
-        self.sparse_point_clouds = point_clouds_np
-        self.n_samples = n_samples
         self.max_actors = max_actors
         self.translation_factor = translation_factor
-        self.max_overlap = max_overlap
-        # Generate a random permutation of indices.
-        self.indices = torch.randperm(len(self.sparse_point_clouds))
-        dataset = []
-        for _ in range(n_samples):
-            accumulated_cloud = []
-            # TODO: Get a random number up to the max of actors.
-            # TODO: Transform one row of the available rows, and check if it doesn't overlap.
-            # TODO: Accumulate all actors into one point cloud and append that to dataset.
-            # TODO: __getitem__() needs to get one of these point cloud rows.
-            for i in range(max_actors):
-                # Get a point cloud from the tensor using the shuffled index, shape (1, 1024).
-                point_cloud = self.sparse_point_clouds[self.indices[index]]
-                point_cloud_data = point_cloud[:, 2:5]  # returns shape: (1024, 3)
-                valid_transform = False
-                while not valid_transform:
-                    point_cloud = point_cloud_data.clone()
-                    # Randomly translate the point cloud along the x and z axes
-                    self.apply_random_translation(point_cloud)
-                    # Apply random rotation around the y-axis
-                    rotated_point_cloud_data = apply_random_y_rotation(point_cloud)
-                    if not does_overlap(accumulated_cloud, point_cloud, self.max_overlap):
-                        accumulated_cloud.append(point_cloud)
-                        valid_transform = True
-    def apply_random_translation(self, point_cloud: torch.Tensor) -> None:
-        x_translation = (torch.rand(1).item() * 2 - 1) * self.translation_factor
-        z_translation = (torch.rand(1).item() * 2 - 1) * self.translation_factor
-        point_cloud[:, [0, 2]] += torch.tensor([x_translation, z_translation], device='cuda')
-    def fill_point_cloud(self, point_cloud):
-        target_num_points = 73 * self.max_actors
-        current_num_points = point_cloud.shape[1]
-        if current_num_points < target_num_points:
-            num_points_to_add = target_num_points - current_num_points
-            random_indices = torch.randint(0, current_num_points, (num_points_to_add,))
-            additional_points = point_cloud[:, random_indices, :]
-            filled_point_cloud = torch.cat((point_cloud, additional_points), dim=1)
         else:
-            filled_point_cloud = point_cloud
-        return filled_point_cloud
     def __getitem__(self, index):
-        point_cloud = np.vstack(accumulated_cloud)
-        # Separate the labels from the point cloud data
-        actor_labels = point_cloud[:, :, 0]  # shape: (1024,)
-        marker_labels = point_cloud[:, :, 1]  # shape: (1024,)
-        return actor_labels, marker_labels, rotated_point_cloud_data
     def __len__(self):
-        return len(self.sparse_point_clouds)
-def does_overlap(accumulated_point_cloud, new_point_cloud, overlap_thresholds=(0.2, 0.2, 0.2)):
-    def project_to_axis(point_cloud, axis):
-        projected_points = point_cloud.clone()
-        projected_points[:, axis] = 0
-        return projected_points
-    def get_bounding_box_2d(points):
-        min_values, _ = torch.min(points, dim=0)
-        max_values, _ = torch.max(points, dim=0)
         return min_values, max_values
-    def check_surface_area_overlap(bb1_min, bb1_max, bb2_min, bb2_max, axis, overlap_threshold):
-        bb1_area = (bb1_max[axis] - bb1_min[axis]) * (bb1_max[1] - bb1_min[1])
-        bb2_area = (bb2_max[axis] - bb2_min[axis]) * (bb2_max[1] - bb2_min[1])
-        overlap_min = torch.max(bb1_min, bb2_min)
-        overlap_max = torch.min(bb1_max, bb2_max)
-        overlap_area = (overlap_max[axis] - overlap_min[axis]) * (overlap_max[1] - overlap_min[1])
-        overlap_area = torch.max(torch.tensor(0.0, device='cuda'), overlap_area)  # Clamp to 0 if negative
-        overlap_percentage = overlap_area / torch.min(bb1_area, bb2_area)
-        return overlap_percentage >= overlap_threshold
-    new_point_cloud_xz = project_to_axis(new_point_cloud, 1)  # Project to xz-plane (remove y-axis values)
-    new_point_cloud_min, new_point_cloud_max = get_bounding_box_2d(new_point_cloud_xz)
     overlaps = []
-    for pc in accumulated_point_cloud:
-        for axis in range(len(overlap_thresholds)):
-            pc_xz = project_to_axis(pc, axis)  # Project to xz-plane (remove y-axis values)
-            pc_min, pc_max = get_bounding_box_2d(pc_xz)
-            if all(
-                    check_surface_area_overlap(
-                        new_point_cloud_min,
-                        new_point_cloud_max,
-                        pc_min,
-                        pc_max,
-                        axis,
-                        overlap_thresholds[axis],
-                    )
-                    for axis in range(len(overlap_thresholds))
-            ):
-                return True
-    return False
-class NoOverlapDataLoader(data.DataLoader):
-    def __init__(self, dataset: data.Dataset, max_overlap: Tuple[float] = (0.2, 0.2, 0.2), *args, **kwargs):
-        super().__init__(dataset, *args, **kwargs)
-        self.max_overlap = max_overlap
-    def __iter__(self):
-        accumulated_point_clouds = []
-        for actor_labels, marker_labels, point_cloud_data in super().__iter__():
-            if not does_overlap(accumulated_point_clouds, point_cloud_data, self.max_overlap):
-                accumulated_point_clouds.append(point_cloud_data)
-                yield actor_labels, marker_labels, point_cloud_data

 from pathlib import Path
+from typing import Tuple, List, Union
+from random import randint
+import h5py
 import numpy as np
 import torch
+from torch import Tensor
+from torch.utils.data import Dataset
+import matplotlib.pyplot as plt
+import fbx_handler
+import utils
+def apply_y_rotation(point_cloud_data: Tensor, angle: float = None, device: str = 'cuda') -> Tensor:
+    """
+    Apply a random rotation to the point cloud.
+    :param point_cloud_data: `Tensor` of shape (3, 73) to modify.
+    :param angle: Angle as `float` in degrees to rotate the point cloud. If this is given, the rotation is not random.
+    :param device: `str` device on which to create the extra tensors.
+    :return: Modified `Tensor`.
+    """
+    # Convert the random angle from degrees to radians.
+    if angle is None:
+        # If no angle is given, use a random angle between -180 and 180.
+        angle = (torch.rand(1).item() * 2 - 1) * 180 * torch.tensor(torch.pi / 180, device=device)
+    else:
+        # If an angle is given, convert this angle instead.
+        angle *= torch.tensor(torch.pi / 180, device=device)
+    # Transpose the point_cloud_data from (3, 73) to (73, 3) so we can use torch.matmul.
+    point_cloud_data = point_cloud_data.transpose(1, 0)
     # Create the rotation matrix for the y-axis
+    rotation_matrix = torch.tensor([
+        [torch.cos(angle), 0, torch.sin(angle)],
+        [0, 1, 0],
+        [-torch.sin(angle), 0, torch.cos(angle)]], device=device)
+    # Apply the rotation to the point cloud data and reverse the transpose to get back to the original shape (3, 73).
+    return torch.matmul(point_cloud_data, rotation_matrix).transpose(1, 0)
+def fill_1d_tensor_with_zeros(point_cloud: Tensor, pc_size: int = 1024, device: str = 'cuda') -> Tensor:
+    """
+    Fill a 1D tensor with zeros, so it is as long as pc_size.
+    :param point_cloud: `Tensor` of shape (73,) to add zeros to.
+    :param pc_size: `int` amount of points that need to be in the final tensor in total.
+    :param device: `str` device on which to create the extra tensors.
+    :return: `Tensor` of  shape (pc_size,).
+    """
+    length = len(point_cloud)
+    if length < pc_size:
+        zeros = torch.zeros(pc_size - length, dtype=torch.int, device=device)
+        point_cloud = torch.cat((point_cloud, zeros), dim=0)
+    # Since we don't check if the length is longer than pc_size, always return the tensor with the pc_size slice.
+    return point_cloud[:pc_size]
+def fill_frames_tensor(point_cloud: Tensor, pc_size: int = 1024, filler: int = -1, device: str = 'cuda') -> Tensor:
+    """
+    Fill a 1D tensor with ones, so it is as long as pc_size.
+    :param point_cloud: `Tensor` of shape (73,) to add `int` -1s to.
+    :param pc_size: `int` amount of points that need to be in the final tensor in total.
+    :param filler: `int` value to fill the remainder of the tensor with.
+    :param device: `str` device on which to create the extra tensors.
+    :return: `Tensor` of shape (pc_size,).
+    """
+    length = len(point_cloud)
+    if length < pc_size:
+        zeros = torch.full((pc_size - length,), filler, dtype=torch.int, device=device)
+        point_cloud = torch.cat((point_cloud, zeros), dim=0)
+    # Since we don't check if the length is longer than pc_size, always return the tensor with the pc_size slice.
+    return point_cloud[:pc_size]
+def convert_max_overlap(max_overlap: Union[Tuple[float, float, float], float]) -> Tuple[float, float, float]:
+    """
+    Convert the argument max_overlap to a float tuple of length 3.
+    :param max_overlap: Either 3 floats or 1 float.
+    :return: If max_overlap is 3 floats, returns max_overlap unchanged.
+    If it is 1 `float`, returns a tuple of size 3 of that `float`.
+    """
+    if isinstance(max_overlap, float):
+        return max_overlap, max_overlap, max_overlap
+    if len(max_overlap) != 3:
+        raise ValueError(f'max_overlap must be a tuple of length 3, not {len(max_overlap)}.')
+    return max_overlap
+def convert_n_samples(n_samples: Union[int, float], _max: int) -> int:
+    """
+    Convert the argument n_samples to an `int` that serves as a total samples amount.
+    :param n_samples: Either a `float` (representing a ratio) or an `int` (representing a number of samples).
+    :param _max: `int` that indicates the highest possible n_samples.
+    :return: An int that is never higher than _max.
+    """
+    # If n_samples is between 0-1, it is considered a ratio, and we calculate the amount of rows to use.
+    if isinstance(n_samples, float):
+        n_samples = int(n_samples * _max)
+    # If n_samples is negative, subtract the amount from the total amount of rows.
+    elif n_samples < 0:
+        n_samples = _max - n_samples
+    # If n_samples is 0, use all rows.
+    elif n_samples == 0 or n_samples > _max:
+        n_samples = _max
+    return n_samples
+def plot_point_cloud(point_cloud: Tensor, scale: Union[int, float] = 50):
+    tensor = point_cloud.cpu().numpy()
+    # Extract x, y, and z coordinates from the tensor
+    x = tensor[:, 0]
+    y = tensor[:, 1]
+    z = tensor[:, 2]
+    # Create a 3D plot
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection='3d')
+    # Scatter plot
+    ax.scatter(x, y, z, s=scale)
+    # Set axis labels
+    ax.set_xlabel('X')
+    ax.set_ylabel('Y')
+    ax.set_zlabel('Z')
+    ax.set_xlim([-0.5, 0.5])
+    ax.set_ylim([-0.5, 0.5])
+    ax.set_zlim([-0.5, 0.5])
+    ax.zaxis._axinfo['juggled'] = (1, 1, 0)
+    ax.xaxis.pane.fill = False
+    ax.yaxis.pane.fill = False
+    ax.zaxis.pane.fill = False
+    # Show the plot
+    plt.show()
+def compare_point_clouds(existing, title='plot'):
+    colors = plt.cm.jet(np.linspace(0, 1, len(existing)))
+    n_tensors = len(existing)
+    plt.figure(figsize=(10, 7))
+    for idx, tensor in enumerate(existing):
+        tensor = tensor.cpu().numpy()
+        # Extract the first and third elements
+        x_coords = tensor[0]
+        z_coords = tensor[2]
+        # Create a scatter plot
+        plt.scatter(x_coords, z_coords, c=colors[idx], label=f'Tensor {idx + 1}', s=1)
+    plt.show()
+def fill_translation_cloud(translations: Tensor, n_points: int = 1024, augment=torch.rand,
+                           apply_shuffle: bool = True, shuffle: Tensor = None, device: str = 'cuda') \
+        -> Tuple[Tensor, Tensor]:
+    """
+    Fill a translation tensor with filler data, so it is as long as pc_size.
+    :param translations: `Tensor` of shape (3, xxx).
+    :param n_points: `int` amount of total points that need to be in the output.
+    :param augment: Torch filler function to use for generating filler points, default `torch.rand`.
+    :param apply_shuffle: `bool` whether to shuffle the output.
+    :param shuffle: `Tensor` that contains a shuffled index order that needs to be used for shuffling.
+    This does nothing if apply_shuffle is False.
+    :param device: `str` device on which to create the extra tensors.
+    :return: Translation and shuffle tuple of `Tensor` of shape (3, n_points), and (n_points,).
+    """
+    # Use the second dimension as the length of the translation tensor, due to input shape (3, 73..).
+    length = translations.shape[1]
+    # Only create filler data if the length is shorter than the amount of points.
+    if length < n_points:
+        # Calculate the shape of the extra tensor, and pass it to the given augment function.
+        dif = (translations.shape[0], n_points - length)
+        extra = augment(dif, device=device)
+        # Concatenate all values together to get shape (3, pc_size).
+        translations = torch.cat((translations, extra), dim=1)
+    else:
+        translations = translations[:, :n_points]
+    # Shuffle if needed.
+    if apply_shuffle:
+        if shuffle is None:
+            shuffle = torch.randperm(n_points, device=device)
+        translations = torch.index_select(translations, 1, shuffle)
+    return translations, shuffle
+def fill_point_clouds(actor_classes: Tensor, marker_classes: Tensor, translations: Tensor, frames: Tensor,
+                      n_points: int = 1024, augment=torch.rand, apply_shuffle: bool = True, shuffle: Tensor = None,
+                      device: str = 'cuda') \
+        -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
+    """
+    Fill a point cloud with filler data, so it is as long as pc_size.
+    :param actor_classes: `Tensor` of shape (n_points,) that contains the actor classes.
+    :param marker_classes: `Tensor` of shape (n_points,) that contains the marker classes.
+    :param translations: `Tensor` of shape (3, n_points) that contains the marker translations.
+    :param frames: `Tensor` of shape (n_points,) that contains the animated frames.
+    :param n_points: `int` amount of total points that need to be in the output.
+    :param augment: Torch filler function to use for generating filler points, default `torch.rand`.
+    :param apply_shuffle: `bool` whether to shuffle the output.
+    :param shuffle: `Tensor` that contains a shuffled index order that needs to be used for shuffling. This does nothing if apply_shuffle is False.
+    :param device: `str` device on which to create the extra tensors.
+    :return: Tuple of `Tensor` of shape (n_points,), (n_points,), (3,n_points,), (n_points,), (n_points,)
+    that represent the actor classes, marker classes, translations, animated frames and the shuffled indices used.
+    """
+    # Use simple functions to create full tensors for the actors/markers/frames.
+    actor_classes = fill_1d_tensor_with_zeros(actor_classes, n_points, device=device)
+    marker_classes = fill_1d_tensor_with_zeros(marker_classes, n_points, device=device)
+    frames = fill_frames_tensor(frames, n_points, device=device)
+    # Extend the translation tensor.
+    length = translations.shape[1]
+    if length < n_points:
+        dif = (3, n_points - length)
+        extra = augment(dif, device=device)
+        # Concatenate all values together to get shape (pc_size,).
+        translations = torch.cat((translations, extra), dim=1)
+    else:
+        translations = translations[:, :n_points]
+    # Shuffle if needed.
+    if apply_shuffle:
+        if shuffle is None:
+            shuffle = torch.randperm(n_points, device=device)
+        actor_classes = torch.index_select(actor_classes, 0, shuffle)
+        marker_classes = torch.index_select(marker_classes, 0, shuffle)
+        translations = torch.index_select(translations, 1, shuffle)
+        frames = torch.index_select(frames, 0, shuffle)
+    # Returns a list of tensors of shape (n_points,), (n_points,), (3, n_points), (n_points,).
+    return actor_classes, marker_classes, translations, frames, shuffle
+def remove_inf_markers(labeled: np.ndarray, device: str = 'cuda'):
+    """
+    Goes through the labeled data and removes all markers that have inf features. This will also scale the translations.
+    :param labeled: `np.ndarray` of shape (15, n_points) that contains the labeled data.
+    :param device: `str` device on which to create the extra tensors.
+    :return: Tuple of `tensor` that represent actors/markers/scaled translations/unscaled translations/frames.
+    """
+    # Check if the second feature (tx) is inf. This means it had no keyframe,
+    # and the NN should not classify this to avoid the network learning interpolated markers.
+    # Mask is True if it had a keyframe.
+    mask = ~np.isinf(labeled[2])
+    # Make tensors from the np arrays.
+    actor_cloud = torch.tensor(labeled[0][mask], dtype=torch.int, device=device)
+    marker_cloud = torch.tensor(labeled[1][mask], dtype=torch.int, device=device)
+    unscaled_t_cloud = labeled[2:5][:, mask]
+    frames = torch.tensor(labeled[-1][mask], dtype=torch.int, device=device)
+    # Scale the translations into a separate tensor.
+    scaled_t_cloud = fbx_handler.scale_translations(unscaled_t_cloud)
+    scaled_t_cloud = torch.tensor(scaled_t_cloud, dtype=torch.float32, device=device)
+    # After the scaled_t_cloud is made, we can convert the unscaled_t_cloud to a tensor too.
+    unscaled_t_cloud = torch.tensor(unscaled_t_cloud, dtype=torch.float32, device=device)
+    return actor_cloud, marker_cloud, scaled_t_cloud, unscaled_t_cloud, frames
+def apply_translation(point_cloud: Tensor, t: float = 1.0, device: str = 'cuda') -> Tensor:
+    """
+    Apply a translation to all axes of a point cloud.
+    :param point_cloud: `Tensor` of shape (3, n_points) that contains the point cloud.
+    :param t: `float` that represents the translation.
+    :param device: `str` device on which to create the extra tensors.
+    :return: `Tensor` of shape (3, n_points) that contains the point cloud with the translation applied.
+    """
+    point_cloud[0] += torch.tensor(t, device=device)
+    point_cloud[1] += torch.tensor(t, device=device)
+    point_cloud[2] += torch.tensor(t, device=device)
+    return point_cloud
+class TrainDataset(Dataset):
+    def __init__(self, file: Union[Path, np.array],
+                 n_samples: Union[int, float] = 1.0,
+                 n_attempts: int = 10,
+                 pc_size: int = 1024,
                  max_actors: int = 8,
+                 use_random_max_actors: bool = True,
+                 use_random_translation: bool = True,
+                 use_random_rotation: bool = True,
+                 shuffle_markers: bool = True,
+                 translation_factor: float = 0.9,
+                 max_overlap: Union[Tuple[float, float, float], float] = (0.2, 0.2, 0.2),
+                 augment=torch.rand,
+                 debug: int = -1,
+                 device: str = 'cuda'):
+        self.debug = debug
+        self.device = device
+        # If the pc_size is a number under 73, we intend to use it as a multiplication.
+        if pc_size < 73:
+            pc_size *= 73
+        elif pc_size < max_actors * 73:
+            raise ValueError(f'pc_size must be large enough to contain 73 markers for {max_actors} actors '
+                             f'({pc_size}/{max_actors * 73}).')
+        # Store most arguments as class properties, so they don't have to be passed to each function.
+        # These will all be deleted after the dataset is created.
+        self.n_attempts = n_attempts
+        self.pc_size = pc_size
         self.max_actors = max_actors
+        self.shuffle_markers = shuffle_markers
         self.translation_factor = translation_factor
+        self.max_overlap = convert_max_overlap(max_overlap)
+        # Isolate the dependent and independent variables.
+        if isinstance(file, np.ndarray):
+            self.all_data = file
+        else:
+            self.all_data = utils.h5_to_array4d(file)
+        # Shape (n_frames, 15, 73).
+        self.all_data = torch.tensor(self.all_data, dtype=torch.float32, device=device)
+        self.n_samples = convert_n_samples(n_samples, self.all_data.shape[0])
+        self._print(f'Loaded in {len(self.all_data)} poses, with n_samples = {n_samples}.', 0)
+        # Generate a random permutation of indices.
+        self.random_indices = torch.randperm(len(self.all_data))
+        self.random_idx = 0
+        # Initiate empty lists for all the different types of data.
+        actor_classes, marker_classes, translations, frames = [], [], [], []
+        # For each sample, create a random point cloud.
+        for _ in range(self.n_samples):
+            cur_max_actors = randint(1, max_actors) if use_random_max_actors else max_actors
+            actor_cloud, marker_cloud, translation_cloud, fs = self.create_sample(cur_max_actors,
+                                                                                  use_random_rotation,
+                                                                                  use_random_translation, augment)
+            actor_classes.append(actor_cloud)
+            marker_classes.append(marker_cloud)
+            translations.append(translation_cloud)
+            frames.append(fs)
+        # (n_samples, pc_size), (n_samples, pc_size), (n_samples, 3, pc_size), (n_samples,pc_size).
+        self.actor_classes = torch.stack(actor_classes)
+        self.marker_classes = torch.stack(marker_classes)
+        self.translations = torch.stack(translations)
+        self.frames = torch.stack(frames)
+        # Delete class properties that were only needed to create the dataset.
+        del self.pc_size, self.max_actors, self.shuffle_markers, self.translation_factor, self.n_samples, \
+            self.max_overlap, self.all_data, self.random_indices, self.random_idx, self.n_attempts
+    def _print(self, txt: str, lvl: int = 0) -> None:
+        if lvl <= self.debug:
+            print(txt)
+    def create_sample(self, max_actors: int, use_random_rotation: bool = True,
+                      use_random_translation: bool = True, augment=torch.rand) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+        """
+        Create a random point cloud from the dataset.
+        :param max_actors: `int` amount of actors to aim for in this point cloud. Any missing markers will be filled.
+        :param use_random_rotation: `bool` whether to apply a random rotation to each actor's point cloud.
+        :param use_random_translation:  `bool` whether to apply a random translation to each actor's point cloud.
+        :param augment: Torch function to use for the filler markers. Examples are `torch.rand`, `torch.ones`, etc.
+        :return: A tuple of tensors containing the actor point cloud, marker point cloud, and translation point cloud.
+        """
+        # Loop through all cur_max_actors, select a row from all_data, and concatenate it to the t_cloud.
+        actor_cloud, marker_cloud, t_cloud, frames = [], [], [], []
+        # For each actor, try 10 times to find a point cloud that does not overlap the accumulated cloud.
+        # If it fails all times, we will just have fewer actors in the point cloud.
+        for actor_idx in range(max_actors):
+            for attempt in range(self.n_attempts):
+                # In case we ever have lots of attempts, reset the random index if we have reached the end of the data.
+                if self.random_idx == len(self.all_data):
+                    self.random_idx = 0
+                # Get a pose from the tensor using the shuffled index; shape (1, 14, 73).
+                row = self.all_data[self.random_indices[self.random_idx]]
+                self.random_idx += 1
+                # Collect relevant data from the row.
+                # Shapes: (73,).
+                a = row[0].to(torch.int)
+                m = row[1].to(torch.int)
+                f = row[-1].to(torch.int)
+                # Shape (3, 73).
+                t = row[2:5]
+                # Apply random rotation and translations if needed.
+                if use_random_rotation:
+                    t = apply_y_rotation(t, device=self.device)
+                if use_random_translation:
+                    t = self.apply_random_translation(t)
+                self._print(f'Checking overlap for {actor_idx} - {attempt}', 1)
+                if does_overlap(t_cloud, t, max_overlap=self.max_overlap):
+                    # If the clouds overlap too much, we continue to the next attempt without adding this one.
+                    print(f'Actor {actor_idx + 1} attempt {attempt + 1} failed.')
+                    continue
+                # Add data to their respective lists if the clouds don't overlap.
+                actor_cloud.append(a)
+                marker_cloud.append(m)
+                t_cloud.append(t)
+                frames.append(f)
+                self._print(f'Actor {actor_idx + 1} attempt {attempt + 1} succeeded.', 1)
+                # If the clouds don't overlap too much,
+                # we break the loop because this attempt worked, and we don't need another one.
+                break
+        self._print(f'Total length: {len(t_cloud)}/{max_actors}', 0)
+        # Add all lists together to create long tensors.
+        # Shape (n_actors * 73,).
+        actor_cloud = torch.cat(actor_cloud, dim=0)
+        marker_cloud = torch.cat(marker_cloud, dim=0)
+        frames = torch.cat(frames, dim=0)
+        # Shape (3, n_actors * 73).
+        t_cloud = torch.cat(t_cloud, dim=1)
+        # Fill the clouds with more markers to get to pc_size.
+        # (1024,), (1024,), (1024, 3), (1024,).
+        actor_cloud, marker_cloud, t_cloud, frames, _ = fill_point_clouds(
+            actor_cloud, marker_cloud, t_cloud, frames, n_points=self.pc_size,
+            augment=augment, apply_shuffle=self.shuffle_markers, device=self.device)
+        return actor_cloud, marker_cloud, t_cloud, frames
+    def apply_random_translation(self, point_cloud: Tensor) -> Tensor:
+        """
+        Apply random translation to the point cloud.
+        :param point_cloud: `Tensor` of shape (3, n_points).
+        :return: Translated `Tensor` of shape (3, n_points).
+        """
+        x_translation = (torch.rand(1).item() - 0.5) * self.translation_factor
+        z_translation = (torch.rand(1).item() - 0.5) * self.translation_factor
+        point_cloud[0] += torch.tensor(x_translation, device=self.device)
+        point_cloud[2] += torch.tensor(z_translation, device=self.device)
+        return point_cloud
+    def __getitem__(self, index):
+        return self.actor_classes[index], self.marker_classes[index], self.translations[index], self.frames[index]
+    def __len__(self):
+        return len(self.actor_classes)
+class InfDataset(Dataset):
+    def __init__(self, source: Union[Path, Tuple[np.ndarray, np.ndarray]],
+                 pc_size: int = 1024,
+                 n_samples: Union[int, float] = 1.0,
+                 augment=torch.rand,
+                 shuffle_markers: bool = False,
+                 debug: int = -1,
+                 device: str = 'cuda') -> None:
+        self.device = device
+        self.debug = debug
+        if isinstance(source, np.ndarray):
+            labeled_data, unlabeled_data = source
         else:
+            # if isinstance(source, Path):
+            #     # if source.stem == 'ALL':
+            #     #     self.data = utils.combined_test_h5_to_array4d(source, pc_size)
+            #     # else:
+            with h5py.File(source, 'r') as h5f:
+                labeled_data = np.array(h5f['labeled'])[:5]
+                unlabeled_data = np.array(h5f['unlabeled'])[:5]
+            #         self.data = utils.merge_labeled_and_unlabeled_data(labeled_data, unlabeled_data, pc_size, augment)
+            # else:
+            # labeled_data, unlabeled_data = source
+        self.assemble_data(augment, labeled_data, unlabeled_data, pc_size, n_samples, shuffle_markers)
+        self._print(f'Actors: {self.actor_classes.shape}, markers: {self.marker_classes.shape}, '
+                    f'translations: {self.translations.shape}', 0)
+        self._print(self.actor_classes[:, :10], 0)
+        self._print(self.marker_classes[:, :10], 0)
+        self._print(self.translations[:, :, :10], 0)
+        self._print(self.unscaled_translations[:, :, :10], 0)
+        self._print(self.frames[:, :10], 0)
+    def _print(self, txt: str, lvl: int = 0) -> None:
+        if lvl <= self.debug:
+            print(txt)
+    def assemble_data(self, augment, labeled_data: np.ndarray, unlabeled_data: np.ndarray, pc_size: int = 1024,
+                      n_samples: int = 5, shuffle_markers: bool = False):
+        """
+        Assemble the various tensors.
+        :param augment: Torch function to use for the filler markers. Examples are `torch.rand`, `torch.ones`, etc.
+        :param labeled_data: `np.ndarray` that contains the data of the labeled markers.
+        :param unlabeled_data: `np.ndarray` that contains the data of the unlabeled markers.
+        :param pc_size: `int` amount of points to put in the point cloud.
+        :param n_samples: Total amount of samples to generate.
+        :param shuffle_markers: `bool` whether to shuffle the markers in the point cloud.
+        """
+        n_samples = convert_n_samples(n_samples, len(labeled_data))
+        # Initialize empty lists to store the data in.
+        actor_classes, marker_classes, translations, unscaled_translations, frames = [], [], [], [], []
+        for frame in range(n_samples):
+            labeled = labeled_data[frame]
+            unlabeled = unlabeled_data[frame]
+            actor_cloud, marker_cloud, scaled_t_cloud, unscaled_t_cloud, l_frames = remove_inf_markers(
+                labeled, device=self.device)
+            ul_actor_cloud, ul_marker_cloud, ul_scaled_t_cloud, ul_unscaled_t_cloud, ul_frames = \
+                remove_inf_markers(unlabeled, device=self.device)
+            merged_actors = torch.cat([actor_cloud, ul_actor_cloud], dim=0)
+            merged_markers = torch.cat([marker_cloud, ul_marker_cloud], dim=0)
+            merged_translations = torch.cat([scaled_t_cloud, ul_scaled_t_cloud], dim=1)
+            merged_unscaled_translations = torch.cat([unscaled_t_cloud, ul_unscaled_t_cloud], dim=1)
+            merged_frames = torch.cat([l_frames, ul_frames], dim=0)
+            # fill_point_clouds() uses the augment function to fill the point clouds, so we can't use it to
+            # fill the unscaled translations.
+            actor_cloud, marker_cloud, scaled_t_cloud, merged_frames, shuffled_idx = \
+                fill_point_clouds(merged_actors, merged_markers, merged_translations, merged_frames,
+                                  n_points=pc_size, augment=augment, apply_shuffle=shuffle_markers, device=self.device)
+            # use fill_translation_cloud to fill the unscaled translations.
+            # This is a separate function because fill_point_clouds() is also used in the TrainDataset class.
+            merged_unscaled_translations, _ = fill_translation_cloud(merged_unscaled_translations, n_points=pc_size,
+                                                                     augment=augment, apply_shuffle=shuffle_markers,
+                                                                     shuffle=shuffled_idx, device=self.device)
+            actor_classes.append(actor_cloud)
+            marker_classes.append(marker_cloud)
+            translations.append(scaled_t_cloud)
+            unscaled_translations.append(merged_unscaled_translations)
+            frames.append(merged_frames)
+        # (n_samples, pc_size), (n_samples, pc_size), (n_samples, 3, pc_size).
+        self.actor_classes = torch.stack(actor_classes)
+        self.marker_classes = torch.stack(marker_classes)
+        self.translations = torch.stack(translations)
+        self.unscaled_translations = torch.stack(unscaled_translations)
+        self.frames = torch.stack(frames)
     def __getitem__(self, index):
+        return self.actor_classes[index], self.marker_classes[index], \
+            self.translations[index], self.unscaled_translations[index], self.frames[index]
     def __len__(self):
+        return len(self.actor_classes)
+def does_overlap(accumulated_point_cloud: List[Tensor], new_point_cloud: Tensor,
+                 max_overlap: Tuple[float, float, float] = (0.2, 0.2, 0.2)) -> bool:
+    """
+    Checks if a new point cloud overlaps with any of the existing point clouds.
+    :param accumulated_point_cloud: List of `Tensor` of the accumulated point clouds.
+    :param new_point_cloud: `Tensor` point cloud to check overlap for.
+    :param max_overlap: Tuple of 3 floats to indicate allowed overlapping thresholds for each axis.
+    :return: `bool` whether the new point cloud overlaps with any of the existing point clouds.
+    """
+    def get_bounding_box(points: Tensor) -> Tuple[Tensor, Tensor]:
+        """
+        Gets the bounding box values (min, max) for each axis.
+        :param points: `Tensor` point cloud to analyze.
+        :return: Tuple of `Tensor` of minimum and maximum values.
+        """
+        min_values, _ = torch.min(points, dim=1)
+        max_values, _ = torch.max(points, dim=1)
         return min_values, max_values
+    def check_dimensional_overlap(bb1_min: Tensor, bb1_max: Tensor, bb2_min: Tensor, bb2_max: Tensor,
+                                  overlap_threshold: float = 0.2) -> bool:
+        """
+        Checks if two bounding boxes overlap in one axis.
+        :param bb1_min: `Tensor` of minimum value for the first bounding box.
+        :param bb1_max: `Tensor` of maximum value for the first bounding box.
+        :param bb2_min: `Tensor` of minimum value for the second bounding box.
+        :param bb2_max: `Tensor` of maximum value for the second bounding box.
+        :param overlap_threshold: `float` that indicates the maximum % of overlap allowed for this axis.
+        :return: `bool` whether the two bounding boxes overlap.
+        """
+        # Find the highest bbox minimum and the lowest bbox maximum.
+        overlap_min = torch.maximum(bb1_min, bb2_min)
+        overlap_max = torch.minimum(bb1_max, bb2_max)
+        # Calculate the overlap length. If the bounding boxes don't overlap, this length will be negative.
+        # Then we can return False right away.
+        overlap_length = overlap_max - overlap_min
+        if overlap_length <= 0:
+            return False
+        # Given that the overlap length is a positive number, we need to calculate how much overlap is happening.
+        # First find the outer bounds of the both bounding boxes (lowest minimum and highest maximum).
+        non_overlap_min = torch.minimum(bb1_min, bb2_min)
+        non_overlap_max = torch.maximum(bb1_max, bb2_max)
+        # Then calculate what fraction of the total length is the overlapping length.
+        total_length = non_overlap_max - non_overlap_min
+        overlap_ratio = overlap_length / total_length
+        # Return whether this ratio is higher than the allowed threshold.
+        return overlap_ratio > overlap_threshold
+    def check_3dimensional_overlap(bb1_min: Tensor, bb1_max: Tensor, bb2_min: Tensor, bb2_max: Tensor,
+                                   overlap_thresholds: Tuple[float, float, float]) -> bool:
+        """
+        Checks if two 3-dimensional bounding boxes overlap in the x and z axis.
+        :param bb1_min: `Tensor` of minimum values for the first bounding box.
+        :param bb1_max: `Tensor` of maximum values for the first bounding box.
+        :param bb2_min: `Tensor` of minimum values for the second bounding box.
+        :param bb2_max: `Tensor` of maximum values for the second bounding box.
+        :param overlap_thresholds: Tuple of 3 `float` that indicates the maximum % of overlap allowed for all axes.
+        :return: `bool` whether the two bounding boxes overlap.
+        """
+        x_overlap = check_dimensional_overlap(bb1_min[0], bb1_max[0], bb2_min[0], bb2_max[0], overlap_thresholds[0])
+        z_overlap = check_dimensional_overlap(bb1_min[2], bb1_max[2], bb2_min[2], bb2_max[2], overlap_thresholds[2])
+        # EXTRA: Check if the y axes are overlapping.
+        return x_overlap and z_overlap
+    # If this is the first attempt of checking an overlap, the accumulated point cloud is empty,
+    # so we don't need to check any overlap.
+    if not accumulated_point_cloud:
+        return False
+    # Find the bounding box values of the new point cloud.
+    new_min, new_max = get_bounding_box(new_point_cloud)
     overlaps = []
+    # Iterate through each point cloud in the accumulated list.
+    for idx, pc in enumerate(accumulated_point_cloud):
+        # Get the bounding box for the current cloud.
+        current_min, current_max = get_bounding_box(pc)
+        # Check if the new point cloud overlaps with the current cloud.
+        overlaps.append(check_3dimensional_overlap(current_min, current_max, new_min, new_max, max_overlap))
+    # If any axis of any point cloud overlapped, we don't want to add the point cloud.
+    return any(overlaps)
+if __name__ == '__main__':
+    # train_dataset = TrainDataset(Path(r'G:\Firestorm\mocap-ai\data\h5\mes-1\train\IntroVideo_04_006.h5'),
+    #                              n_samples=1,
+    #                              max_actors=2,
+    #                              pc_size=2,
+    #                              use_random_max_actors=False,
+    #                              use_random_translation=True,
+    #                              use_random_rotation=False,
+    #                              shuffle_markers=False,
+    #                              max_overlap=.9)
+    # print(dir(train_dataset))
+    test_dataset = InfDataset(Path(r'G:\Firestorm\mocap-ai\data\h5\mes-1\test\HangoutSpot_1_001.h5'),
+                              pc_size=150,
+                              shuffle_markers=False,
+                              debug=0)

preprocess_files.py CHANGED Viewed

@@ -1,15 +1,14 @@
 from pathlib import Path
-import shutil
 import multiprocessing
 # Import custom libs.
 import fbx_handler
 import utils
-source = Path('G:/Firestorm/mocap-ai/data/fbx/mes-2/')
-train_folder = Path('G:/Firestorm/mocap-ai/data/h5/mes-2/train')
-test_folder = Path('G:/Firestorm/mocap-ai/data/h5/mes-2/test')
 def process_fbx_file(fbx_file: Path):
@@ -25,7 +24,7 @@ def process_fbx_file(fbx_file: Path):
         print(fbx_file)
     # Create a new class object with the file path.
-    my_obj = fbx_handler.FBXContainer(fbx_file, max_actors=4, pc_size=296, debug=0, save_init=True)
     # Init world transforms for labeled and unlabeled data. This will store all relevant transform info.
     with utils.Timer('Getting world transforms took'):
         try:
@@ -45,20 +44,18 @@ def process_fbx_file(fbx_file: Path):
     try:
         # Do the same thing for the test data.
-        test_data = my_obj.export_test_data(export_test_path, merged=False)
         print(f'Test labeled shape:   {test_data[0].shape}')
         print(f'Test unlabeled shape: {test_data[1].shape}')
-        print(f'Minimum cloud size:   {test_data[0].shape[1] + test_data[1].shape[1]}')
     except BaseException as e:
         print(e)
         return
-def process_fbx_files(source_folder: Path, v: int = 1):
     # Delete the existing folders and make them again, because the array4d_to_h5 function will append
     # # new data to any existing files.
-    shutil.rmtree(train_folder)
-    shutil.rmtree(test_folder)
     train_folder.mkdir(parents=True, exist_ok=True)
     test_folder.mkdir(parents=True, exist_ok=True)
@@ -69,7 +66,7 @@ def process_fbx_files(source_folder: Path, v: int = 1):
     # train_all = train_folder / 'ALL.h5'
     # test_all = test_folder / 'ALL.h5'
-    with multiprocessing.Pool(4) as pool:
         pool.map(process_fbx_file, files)
     # print('--- FINAL ---')

 from pathlib import Path
 import multiprocessing
 # Import custom libs.
 import fbx_handler
 import utils
+c = 'dowg'
+source = Path(f'G:/Firestorm/mocap-ai/data/fbx/{c}/')
+train_folder = Path(f'G:/Firestorm/mocap-ai/data/h5/{c}/train')
+test_folder = Path(f'G:/Firestorm/mocap-ai/data/h5/{c}/test')
 def process_fbx_file(fbx_file: Path):
         print(fbx_file)
     # Create a new class object with the file path.
+    my_obj = fbx_handler.FBXContainer(fbx_file, debug=0)
     # Init world transforms for labeled and unlabeled data. This will store all relevant transform info.
     with utils.Timer('Getting world transforms took'):
         try:
     try:
         # Do the same thing for the test data.
+        test_data = my_obj.export_inf_data(export_test_path, merged=False)
         print(f'Test labeled shape:   {test_data[0].shape}')
         print(f'Test unlabeled shape: {test_data[1].shape}')
+        print(f'Minimum cloud size:   {test_data[0].shape[2] + test_data[1].shape[2]}')
     except BaseException as e:
         print(e)
         return
+def process_fbx_files(source_folder: Path):
     # Delete the existing folders and make them again, because the array4d_to_h5 function will append
     # # new data to any existing files.
     train_folder.mkdir(parents=True, exist_ok=True)
     test_folder.mkdir(parents=True, exist_ok=True)
     # train_all = train_folder / 'ALL.h5'
     # test_all = test_folder / 'ALL.h5'
+    with multiprocessing.Pool(1) as pool:
         pool.map(process_fbx_file, files)
     # print('--- FINAL ---')

requirements.txt CHANGED Viewed

@@ -2,4 +2,7 @@ streamlit~=1.21.0
 pandas~=1.3.5
 numpy~=1.21.5
 torch~=1.13.1
-h5py

 pandas~=1.3.5
 numpy~=1.21.5
 torch~=1.13.1
+h5py~=3.7.0
+torchinfo~=1.7.2
+seaborn~=0.12.2
+matplotlib~=3.5.3

utils.py CHANGED Viewed

@@ -2,7 +2,7 @@ import cProfile
 import pstats
 import time
 from pathlib import Path
-from typing import List, Tuple
 import h5py
 import numpy as np
@@ -22,19 +22,6 @@ def append_suffix_to_file(file_path: Path, suffix: str = '_INF', ext: str = None
     return file_path.with_name(new_file_name)
-def is_int_in_list(n: int, l: List[int]) -> int:
-    if l[0] > n:
-        return 0
-    for e in l:
-        if e == n:
-            return 1
-        elif e > n:
-            return 0
-    return 0
 def array4d_to_h5(array_4ds: Tuple, output_file: Path, group: str = None, datasets: Tuple = 'array_data'):
     if len(array_4ds) != len(datasets):
         raise ValueError(f'Amount of arrays {len(array_4ds)} must match amount of dataset names {len(datasets)}.')
@@ -53,7 +40,7 @@ def h5_to_array4d(input_file: Path) -> np.array:
         return np.vstack([np.array(h5f[key]) for key in h5f.keys()])
-def combined_test_h5_to_array4d(input_file: Path, pc_size: int = 1024) -> np.array:
     with h5py.File(input_file, 'r') as h5f:
         data = []
         for grp_name in list(h5f.keys()):
@@ -69,10 +56,10 @@ def merge_labeled_and_unlabeled_data(labeled: np.array, unlabeled: np.array, pc_
                                      augment: str = None) -> np.array:
     missing = pc_size - (labeled.shape[2] + unlabeled.shape[2])
     if missing <= 0:
-        # Returns shape (n_frames, self.pc_size, 14).
         return np.concatenate((unlabeled, labeled), axis=2)[:, :, -pc_size:]
-    # This is similar to the way that fill_point_cloud() fills values.
     if augment is None:
         missing_markers = np.ones((labeled.shape[0], labeled.shape[1], missing))
     elif augment == 'normal':
@@ -83,7 +70,7 @@ def merge_labeled_and_unlabeled_data(labeled: np.array, unlabeled: np.array, pc_
     missing_markers[:, 0] = 0.
     missing_markers[:, 1] = 0.
-    # Returns shape (n_frames, self.pc_size, 14).
     return np.concatenate((missing_markers,
                            unlabeled,
                            labeled), axis=2)

 import pstats
 import time
 from pathlib import Path
+from typing import Tuple
 import h5py
 import numpy as np
     return file_path.with_name(new_file_name)
 def array4d_to_h5(array_4ds: Tuple, output_file: Path, group: str = None, datasets: Tuple = 'array_data'):
     if len(array_4ds) != len(datasets):
         raise ValueError(f'Amount of arrays {len(array_4ds)} must match amount of dataset names {len(datasets)}.')
         return np.vstack([np.array(h5f[key]) for key in h5f.keys()])
+def combined_test_h5_to_array4d(input_file: Path, pc_size: int = 1024, merged: bool = True) -> np.array:
     with h5py.File(input_file, 'r') as h5f:
         data = []
         for grp_name in list(h5f.keys()):
                                      augment: str = None) -> np.array:
     missing = pc_size - (labeled.shape[2] + unlabeled.shape[2])
     if missing <= 0:
+        # Returns shape (n_frames, 15, self.pc_size).
         return np.concatenate((unlabeled, labeled), axis=2)[:, :, -pc_size:]
+    # This is similar to the way that TrainDataset.fill_point_cloud() fills values.
     if augment is None:
         missing_markers = np.ones((labeled.shape[0], labeled.shape[1], missing))
     elif augment == 'normal':
     missing_markers[:, 0] = 0.
     missing_markers[:, 1] = 0.
+    # Returns shape (n_frames, 15, self.pc_size).
     return np.concatenate((missing_markers,
                            unlabeled,
                            labeled), axis=2)