Natsha commited on
Commit
765b3de
·
1 Parent(s): 74bcef0

Changed the way that the world transforms are stored. They are now stored in order (n_frames, n_features, n_points).

Browse files

This format can easily be loaded into the PointNet model with batch shape (batch_size, n_features, n_points).
Updated merge_labeled_and_unlabeled_data in utils.py to reflect this.

Files changed (2) hide show
  1. fbx_handler.py +92 -74
  2. utils.py +15 -7
fbx_handler.py CHANGED
@@ -27,7 +27,6 @@ def center_axis(a: Union[List[float], np.array]) -> np.array:
27
  _min = np.min(a)
28
  _max = np.max(a)
29
  _c = _max - (_max - _min) * 0.5
30
- # Center the array by subtracting the centroid.
31
  a -= _c
32
  return a
33
 
@@ -379,6 +378,35 @@ def get_children_of_parent(parent: fbx.FbxNode) -> List[fbx.FbxNode]:
379
  return [parent.GetChild(i) for i in range(parent.GetChildCount())]
380
 
381
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  class FBXContainerBase:
383
  def __init__(self, fbx_file: Path, debug: int = -1) -> None:
384
  """
@@ -693,7 +721,7 @@ class FBXContainer(FBXContainerBase):
693
  :param r: Custom frame range to extract.
694
  """
695
  self.init_labeled_world_transforms(r=r, incl_keyed=1)
696
- self.init_unlabeled_world_transforms(r=r)
697
 
698
  def init_labeled_world_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
699
  incl_keyed: int = 1) -> np.array:
@@ -702,7 +730,7 @@ class FBXContainer(FBXContainerBase):
702
  in r. This can later be used to recreate the world transform matrix.
703
  :param r: Custom frame range to use.
704
  :param incl_keyed: `bool` whether to check if the marker was keyed at the frame.
705
- :return: `np.array` of shape (n_frames, n_markers, 14).
706
  """
707
  r = self.convert_r(r)
708
  labeled_data = []
@@ -725,21 +753,20 @@ class FBXContainer(FBXContainerBase):
725
  labeled_data.append(actor_data)
726
 
727
  # Convert the list to a np array. This will have all frames at the last dimension because of this order:
728
- # Shape (n_actors, n_markers, 14/15, n_frames).
729
  wide_layout = np.array(labeled_data)
730
- # Transpose the array so that the first dimension is the frames.
731
- # Shape (n_frames, n_actors, n_markers, 14).
732
- self.labeled_world_transforms = np.transpose(wide_layout, axes=(3, 0, 1, 2))
733
  return self.labeled_world_transforms
734
 
735
  def init_unlabeled_world_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
736
- incl_keyed: int = 0) -> np.array:
737
  """
738
  For all unlabeled markers, stores a list for each element in the world transform for each frame
739
  in r. This can later be used to recreate the world transform matrix.
740
  :param r: Custom frame range to use.
741
  :param incl_keyed: `bool` whether to check if the marker was keyed at the frame.
742
- :return: `np.array` of shape (n_frames, n_unlabeled_markers, 14).
743
  """
744
  r = self.convert_r(r)
745
  unlabeled_data = []
@@ -756,12 +783,10 @@ class FBXContainer(FBXContainerBase):
756
  self._print(f'Unlabeled marker {ulm.GetName()} done', 1)
757
 
758
  # Convert the list to a np array. This will have all frames at the last dimension because of this order:
759
- # Shape (n_unlabeled_markers, 14/15, n_frames).
760
  wide_layout = np.array(unlabeled_data)
761
- # Transpose the array so that the first dimension is the frames.
762
- # Shape (n_frames, n_unlabeled_markers, 14).
763
- self.unlabeled_world_transforms = np.transpose(wide_layout, axes=(2, 0, 1))
764
- # Returns shape (n_frames, n_unlabeled_markers, 14).
765
  return self.unlabeled_world_transforms
766
 
767
  def init(self) -> None:
@@ -789,15 +814,15 @@ class FBXContainer(FBXContainerBase):
789
  def remove_clipping_poses(self, arr: np.array) -> np.array:
790
  """
791
  Checks for each axis if it does not cross the volume limits. Returns an array without clipping poses.
 
792
  :param arr: `np.array` to filter.
793
  :return: Filtered `np.array` that only has non-clipping poses.
794
  """
795
- mask_x1 = (arr[:, :, 2] < self.hvol_x / self.scale).all(axis=1)
796
- mask_x2 = (arr[:, :, 2] > -self.hvol_x / self.scale).all(axis=1)
797
- mask_z1 = (arr[:, :, 4] < self.hvol_z / self.scale).all(axis=1)
798
- mask_z2 = (arr[:, :, 4] > -self.hvol_z / self.scale).all(axis=1)
799
  mask = mask_x1 & mask_x2 & mask_z1 & mask_z2
800
- # print(mask.shape, mask)
801
  return arr[mask]
802
 
803
  def extract_training_translations(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None) -> np.array:
@@ -811,32 +836,28 @@ class FBXContainer(FBXContainerBase):
811
  if self.labeled_world_transforms is None:
812
  self.init_labeled_world_transforms(r=r, incl_keyed=1)
813
 
814
- # Returns (n_frames, n_actors, 73, 15).
815
- l_shape = self.labeled_world_transforms.shape
816
-
817
- # Flatten the array, so we get a list of frames.
818
- # Reshape to (n_frames * n_actors, 73, 15).
819
- flattened = self.labeled_world_transforms.reshape(-1, l_shape[2], l_shape[3])
820
-
821
- # Isolates the poses with all keyframes present by checking the last elements.
822
  # Start with the mask.
823
- # Returns shape of (n_frames * n_actors, 73, 15).
824
- mask = (flattened[..., -1] == 1)
825
  # We only need a filter for the first dimension, so use .all to check if all markers
826
  # have a keyframe. This results in shape (n_frames * n_actors,).
827
  mask = mask.all(axis=1)
 
 
 
828
 
829
- # Now isolate the right frames with the mask and remove the last element of the last dimension,
830
- # because it won't be useful anymore.
831
- # Also remove any frames that cross the limits of the volume.
832
- valid_poses = self.remove_clipping_poses(flattened[mask][..., :-1])
833
 
834
- # Now we need to center the tx and tz axes of each individual pose.
835
- for valid_pose in valid_poses:
836
- for axis in [2, 4]:
837
- valid_pose[:, axis] = center_axis(valid_pose[:, axis])
838
- # Finally, scale the data to the correct size by normalizing.
839
- return self.transform_translations(valid_poses)
 
840
 
841
  def extract_inf_translations(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
842
  merged: bool = True) -> Union[np.array, Tuple[np.array, np.array]]:
@@ -856,24 +877,20 @@ class FBXContainer(FBXContainerBase):
856
  # Note: Unlabeled data is already flattened.
857
  self.init_unlabeled_world_transforms(r=r, incl_keyed=1)
858
 
859
- # Returns (n_frames, n_actors, 73, 15).
860
- ls = self.labeled_world_transforms.shape
861
  # Flatten the array, so we get a list of frames.
862
- # Returns shape (n_frames, 73 * n_actors, 15).
863
- flat_labeled = self.labeled_world_transforms.reshape(ls[0], -1, ls[-1])
864
-
865
- # Find all labeled markers that have their keyed value set to 0 (which means they had no keyframe on tx),
866
- # and set their transforms to np.inf.
867
- mask = flat_labeled[..., -1] == 0
868
- flat_labeled[mask, 2:] = np.inf
869
 
870
- # Do the same for the unlabeled markers.
871
- mask = self.unlabeled_world_transforms[..., -1] == 0
872
- self.unlabeled_world_transforms[mask, 2:] = np.inf
 
 
873
 
874
- # Remove the last element of the last dimension of both arrays, because it won't be useful anymore.
875
- flat_labeled = flat_labeled[..., :-1]
876
- self.unlabeled_world_transforms = self.unlabeled_world_transforms[..., :-1]
877
 
878
  if merged:
879
  return utils.merge_labeled_and_unlabeled_data(labeled=flat_labeled,
@@ -882,32 +899,28 @@ class FBXContainer(FBXContainerBase):
882
  else:
883
  return flat_labeled, self.unlabeled_world_transforms
884
 
885
- def transform_translations(self, w: np.array) -> np.array:
886
  """
887
  Applies a scaling to the translation values in the given array.
888
- :param w: `np.array` that can either be a timeline dense cloud or translation vectors.
889
  :return: Modified `np.array`.
890
  """
891
- # If the last dimension has 3 elements, it is a translation vector of shape (tx, ty, tz).
892
- # If it has 14 elements, it is a full marker row of shape (actor, marker, tx, ty, tz, tw, rx, ry, rz, tw, etc).
893
- start = 0 if w.shape[-1] == 3 else 2
894
 
895
  # First multiply by self.scale, which turns centimeters to meters.
896
  # Then divide by volume dimensions, to normalize to the total area of the capture volume.
897
- w[..., start + 0] = w[..., start + 0] * self.scale / self.vol_x
898
- w[..., start + 1] = w[..., start + 1] * self.scale / self.vol_y
899
- w[..., start + 2] = w[..., start + 2] * self.scale / self.vol_z
900
 
901
- # Then move the x and z to the center of the volume. Y doesn't need to be done because pose needs to stand
902
- # on the floor.
903
- # We do not add 0.5 here to move the pose to the middle of the capture space,
904
- # because in the Dataset we still need to randomly rotate it in world space.
905
- # So we keep it centered here.
906
- w[..., start + 0] = np.clip(w[..., start + 0], -0.5, 0.5)
907
- w[..., start + 1] = np.clip(w[..., start + 1], -0.5, 0.5)
908
- w[..., start + 2] = np.clip(w[..., start + 2], -0.5, 0.5)
909
 
910
- return w
911
 
912
  def get_split_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
913
  mode: str = 'train') -> Tuple[np.array, np.array, np.array, np.array, np.array]:
@@ -958,10 +971,10 @@ class FBXContainer(FBXContainerBase):
958
  else:
959
  raise ValueError('Invalid file extension. Must be .h5')
960
 
961
- def export_test_data(self, output_file: Path, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
962
- merged: bool = True) -> Union[np.array, Tuple[np.array, np.array]]:
963
  """
964
- Exports test data to an HDF5 file.
965
  :param output_file: `Path` to the file.
966
  :param r: Custom frame range to use.
967
  :param merged: `bool` whether to merge the test data or output an unlabeled dataset and labeled dataset.
@@ -1150,3 +1163,8 @@ class FBXContainer(FBXContainerBase):
1150
  if actor_dict:
1151
  self._print(f'Replacing keys for actor {actor_idx}', 1)
1152
  self.replace_keyframes_per_actor(actor_idx, actor_dict)
 
 
 
 
 
 
27
  _min = np.min(a)
28
  _max = np.max(a)
29
  _c = _max - (_max - _min) * 0.5
 
30
  a -= _c
31
  return a
32
 
 
378
  return [parent.GetChild(i) for i in range(parent.GetChildCount())]
379
 
380
 
381
+ def flatten_labeled_transforms(arr: np.array) -> np.array:
382
+ """
383
+ Flattens the given array so that it has the shape (n_actors * n_frames, 15, 73).
384
+ :param arr: `np.array` to process.
385
+ :return: `np.array` of shape (n_frames * n_actors, 15, 73).
386
+ """
387
+ # Transpose the array, so we get this order: (n_actors, n_frames, 15, 73).
388
+ # That way, we can stack the actors after each other instead of the frames
389
+ # (which would happen with the previous order).
390
+ flattened = arr.transpose(1, 0, 2, 3)
391
+ # Flatten the array, so we get a list of frames where with all actors stacked after each other.
392
+ # Reshape to (n_frames * n_actors, 15, 73).
393
+ return np.concatenate(flattened, axis=0)
394
+
395
+
396
+ def replace_zeros_with_inf(arr: np.array) -> np.array:
397
+ """
398
+ Replaces all transform values for each marker on each frame that was not keyed.
399
+ :param arr: `np.array` to process.
400
+ :return: `np.array` with updated values.
401
+ """
402
+ # Find all labeled markers that have their keyed value set to 0 (which means they had no keyframe on tx),
403
+ # and set their transforms to np.inf.
404
+ mask = arr[:, -1] == 0
405
+ for i in range(arr.shape[0]):
406
+ arr[i, 2:, mask[i]] = np.inf
407
+ return arr
408
+
409
+
410
  class FBXContainerBase:
411
  def __init__(self, fbx_file: Path, debug: int = -1) -> None:
412
  """
 
721
  :param r: Custom frame range to extract.
722
  """
723
  self.init_labeled_world_transforms(r=r, incl_keyed=1)
724
+ self.init_unlabeled_world_transforms(r=r, incl_keyed=1)
725
 
726
  def init_labeled_world_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
727
  incl_keyed: int = 1) -> np.array:
 
730
  in r. This can later be used to recreate the world transform matrix.
731
  :param r: Custom frame range to use.
732
  :param incl_keyed: `bool` whether to check if the marker was keyed at the frame.
733
+ :return: `np.array` of shape (n_frames, 15, n_markers).
734
  """
735
  r = self.convert_r(r)
736
  labeled_data = []
 
753
  labeled_data.append(actor_data)
754
 
755
  # Convert the list to a np array. This will have all frames at the last dimension because of this order:
756
+ # Shape (n_actors, n_markers, 15, n_frames).
757
  wide_layout = np.array(labeled_data)
758
+ # Transpose the array so that the order becomes (n_frames, n_actors, 15, n_markers).
759
+ self.labeled_world_transforms = np.transpose(wide_layout, axes=(3, 0, 2, 1))
 
760
  return self.labeled_world_transforms
761
 
762
  def init_unlabeled_world_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
763
+ incl_keyed: int = 1) -> np.array:
764
  """
765
  For all unlabeled markers, stores a list for each element in the world transform for each frame
766
  in r. This can later be used to recreate the world transform matrix.
767
  :param r: Custom frame range to use.
768
  :param incl_keyed: `bool` whether to check if the marker was keyed at the frame.
769
+ :return: `np.array` of shape (n_frames, 15, n_unlabeled_markers).
770
  """
771
  r = self.convert_r(r)
772
  unlabeled_data = []
 
783
  self._print(f'Unlabeled marker {ulm.GetName()} done', 1)
784
 
785
  # Convert the list to a np array. This will have all frames at the last dimension because of this order:
786
+ # Shape (n_unlabeled_markers, 15, n_frames).
787
  wide_layout = np.array(unlabeled_data)
788
+ # Transpose the array so that the order becomes (n_frames, 15, n_unlabeled_markers).
789
+ self.unlabeled_world_transforms = np.transpose(wide_layout, axes=(2, 1, 0))
 
 
790
  return self.unlabeled_world_transforms
791
 
792
  def init(self) -> None:
 
814
  def remove_clipping_poses(self, arr: np.array) -> np.array:
815
  """
816
  Checks for each axis if it does not cross the volume limits. Returns an array without clipping poses.
817
+ This function uses the volume dimensions in cms, so use it before the data is scaled down.
818
  :param arr: `np.array` to filter.
819
  :return: Filtered `np.array` that only has non-clipping poses.
820
  """
821
+ mask_x1 = (arr[:, 2] < self.hvol_x / self.scale).all(axis=1)
822
+ mask_x2 = (arr[:, 2] > -self.hvol_x / self.scale).all(axis=1)
823
+ mask_z1 = (arr[:, 4] < self.hvol_z / self.scale).all(axis=1)
824
+ mask_z2 = (arr[:, 4] > -self.hvol_z / self.scale).all(axis=1)
825
  mask = mask_x1 & mask_x2 & mask_z1 & mask_z2
 
826
  return arr[mask]
827
 
828
  def extract_training_translations(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None) -> np.array:
 
836
  if self.labeled_world_transforms is None:
837
  self.init_labeled_world_transforms(r=r, incl_keyed=1)
838
 
839
+ flattened = flatten_labeled_transforms(self.labeled_world_transforms)
840
+ # Isolate the poses with all keyframes present by checking the last elements.
 
 
 
 
 
 
841
  # Start with the mask.
842
+ # Returns shape of (n_frames * n_actors, 15, 73).
843
+ mask = flattened[:, -1] == 1
844
  # We only need a filter for the first dimension, so use .all to check if all markers
845
  # have a keyframe. This results in shape (n_frames * n_actors,).
846
  mask = mask.all(axis=1)
847
+ # Remove the last element of the second dimension, (was it keyframed or not) because it won't be useful anymore.
848
+ flattened = flattened[mask][:, :-1]
849
+ del mask
850
 
851
+ # Remove any frames that cross the limits of the volume.
852
+ flattened = self.remove_clipping_poses(flattened)
 
 
853
 
854
+ for frame in range(flattened.shape[0]):
855
+ # Center the X axis values.
856
+ flattened[frame, 2] = center_axis(flattened[frame, 2])
857
+ # Center the Z axis values.
858
+ flattened[frame, 4] = center_axis(flattened[frame, 4])
859
+
860
+ return self.transform_translations(flattened)
861
 
862
  def extract_inf_translations(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
863
  merged: bool = True) -> Union[np.array, Tuple[np.array, np.array]]:
 
877
  # Note: Unlabeled data is already flattened.
878
  self.init_unlabeled_world_transforms(r=r, incl_keyed=1)
879
 
880
+ # Starting with (n_frames, n_actors, 15, 73).
 
881
  # Flatten the array, so we get a list of frames.
882
+ # Returns shape (n_frames, 15, n_actors, 73).
883
+ flat_labeled = self.labeled_world_transforms.transpose(0, 2, 1, 3)
 
 
 
 
 
884
 
885
+ # Stack the elements in the last 2 dimension after each other.
886
+ # Returns shape (n_frames, 15, n_actors * 73).
887
+ ls = flat_labeled.shape
888
+ flat_labeled = flat_labeled.reshape(ls[0], ls[1], -1)
889
+ del ls
890
 
891
+ # Replace non keyed frames with inf and strip off the keyed element, because this is the last time we need it.
892
+ flat_labeled = replace_zeros_with_inf(flat_labeled)[:, :-1]
893
+ self.unlabeled_world_transforms = replace_zeros_with_inf(self.unlabeled_world_transforms)[:, :-1]
894
 
895
  if merged:
896
  return utils.merge_labeled_and_unlabeled_data(labeled=flat_labeled,
 
899
  else:
900
  return flat_labeled, self.unlabeled_world_transforms
901
 
902
+ def transform_translations(self, arr: np.array) -> np.array:
903
  """
904
  Applies a scaling to the translation values in the given array.
905
+ :param arr: `np.array` that can either be a timeline dense cloud or translation vectors.
906
  :return: Modified `np.array`.
907
  """
908
+ # If the second dimension has 3 elements, it is a translation vector of shape (tx, ty, tz).
909
+ # If it has 14 elements, it is a full marker row of shape (actor, marker, tx, ty, tz, tw, rx, ry, rz, rw, etc.).
910
+ start = 0 if arr.shape[1] == 3 else 2
911
 
912
  # First multiply by self.scale, which turns centimeters to meters.
913
  # Then divide by volume dimensions, to normalize to the total area of the capture volume.
914
+ arr[:, start + 0] *= self.scale / self.vol_x
915
+ arr[:, start + 1] *= self.scale / self.vol_y
916
+ arr[:, start + 2] *= self.scale / self.vol_z
917
 
918
+ # Optional: Clip the translation values.
919
+ # arr[:, start + 0] = np.clip(arr[:, start + 0], -0.5, 0.5)
920
+ # arr[:, start + 1] = np.clip(arr[:, start + 1], -0.5, 0.5)
921
+ # arr[:, start + 2] = np.clip(arr[:, start + 2], -0.5, 0.5)
 
 
 
 
922
 
923
+ return arr
924
 
925
  def get_split_transforms(self, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
926
  mode: str = 'train') -> Tuple[np.array, np.array, np.array, np.array, np.array]:
 
971
  else:
972
  raise ValueError('Invalid file extension. Must be .h5')
973
 
974
+ def export_inf_data(self, output_file: Path, r: Union[int, Tuple[int, int], Tuple[int, int, int]] = None,
975
+ merged: bool = True) -> Union[np.array, Tuple[np.array, np.array]]:
976
  """
977
+ Exports inference data to an HDF5 file.
978
  :param output_file: `Path` to the file.
979
  :param r: Custom frame range to use.
980
  :param merged: `bool` whether to merge the test data or output an unlabeled dataset and labeled dataset.
 
1163
  if actor_dict:
1164
  self._print(f'Replacing keys for actor {actor_idx}', 1)
1165
  self.replace_keyframes_per_actor(actor_idx, actor_dict)
1166
+
1167
+ # if __name__ == '__main__':
1168
+ # np.printoptions(precision=2, suppress=True)
1169
+ # # container = FBXContainer(Path(r'G:\Firestorm\mocap-ai\data\fbx\dowg\TAKE_01+1_ALL_001.fbx'))
1170
+ # container = FBXContainer(Path('G:/Firestorm/mocap-ai/data/fbx/mes-1/DressingRoom_1_001.fbx'), pc_size=600)
utils.py CHANGED
@@ -65,20 +65,28 @@ def combined_test_h5_to_array4d(input_file: Path, pc_size: int = 1024) -> np.arr
65
  return np.vstack(data)
66
 
67
 
68
- def merge_labeled_and_unlabeled_data(labeled: np.array, unlabeled: np.array, pc_size: int) -> np.array:
69
- missing = pc_size - (labeled.shape[1] + unlabeled.shape[1])
 
70
  if missing <= 0:
71
  # Returns shape (n_frames, self.pc_size, 14).
72
- return np.concatenate((unlabeled, labeled), axis=1)[:, -pc_size:]
73
 
74
- missing_markers = np.random.rand(labeled.shape[0], missing, labeled.shape[-1])
75
- missing_markers[:, :, 0] = 0.
76
- missing_markers[:, :, 1] = 0.
 
 
 
 
 
 
 
77
 
78
  # Returns shape (n_frames, self.pc_size, 14).
79
  return np.concatenate((missing_markers,
80
  unlabeled,
81
- labeled), axis=1)
82
 
83
 
84
  class Timer:
 
65
  return np.vstack(data)
66
 
67
 
68
+ def merge_labeled_and_unlabeled_data(labeled: np.array, unlabeled: np.array, pc_size: int,
69
+ augment: str = None) -> np.array:
70
+ missing = pc_size - (labeled.shape[2] + unlabeled.shape[2])
71
  if missing <= 0:
72
  # Returns shape (n_frames, self.pc_size, 14).
73
+ return np.concatenate((unlabeled, labeled), axis=2)[:, :, -pc_size:]
74
 
75
+ # This is similar to the way that fill_point_cloud() fills values.
76
+ if augment is None:
77
+ missing_markers = np.ones((labeled.shape[0], labeled.shape[1], missing))
78
+ elif augment == 'normal':
79
+ missing_markers = np.random.rand(labeled.shape[0], labeled.shape[1], missing)
80
+ else:
81
+ missing_markers = np.zeros((labeled.shape[0], labeled.shape[1], missing))
82
+
83
+ missing_markers[:, 0] = 0.
84
+ missing_markers[:, 1] = 0.
85
 
86
  # Returns shape (n_frames, self.pc_size, 14).
87
  return np.concatenate((missing_markers,
88
  unlabeled,
89
+ labeled), axis=2)
90
 
91
 
92
  class Timer: