mocap-ai / preprocess_files.py
Natsha's picture
Added docs and fixed the math for the data augmentation.
0514416
from pathlib import Path
import multiprocessing
# Import custom libs.
import fbx_handler
import utils
c = 'dowg'
source = Path(f'G:/Firestorm/mocap-ai/data/fbx/{c}/')
train_folder = Path(f'G:/Firestorm/mocap-ai/data/h5/{c}/train')
test_folder = Path(f'G:/Firestorm/mocap-ai/data/h5/{c}/test')
def process_fbx_file(fbx_file: Path):
# Define the export file path with the same file name but in the export folder
export_train_path = train_folder / fbx_file.with_suffix('.h5').name
export_test_path = test_folder / fbx_file.with_suffix('.h5').name
# If both export files already exist, skip this file.
if export_train_path.exists() and export_test_path.exists():
print(f'{fbx_file} done already.')
return
else:
print(fbx_file)
# Create a new class object with the file path.
my_obj = fbx_handler.FBXContainer(fbx_file, debug=0)
# Init world transforms for labeled and unlabeled data. This will store all relevant transform info.
with utils.Timer('Getting world transforms took'):
try:
my_obj.init_world_transforms()
except BaseException as e:
print(e)
return
try:
# Get the train data as an array of shape (n_valid_frames, 73, 14).
# This will also export it to a h5 file just in case.
train_data = my_obj.export_train_data(export_train_path)
print(f'Train shape: {train_data.shape}')
except BaseException as e:
print(e)
return
try:
# Do the same thing for the test data.
test_data = my_obj.export_inf_data(export_test_path, merged=False)
print(f'Test labeled shape: {test_data[0].shape}')
print(f'Test unlabeled shape: {test_data[1].shape}')
print(f'Minimum cloud size: {test_data[0].shape[2] + test_data[1].shape[2]}')
except BaseException as e:
print(e)
return
def process_fbx_files(source_folder: Path):
# Delete the existing folders and make them again, because the array4d_to_h5 function will append
# # new data to any existing files.
train_folder.mkdir(parents=True, exist_ok=True)
test_folder.mkdir(parents=True, exist_ok=True)
files = list(source_folder.glob('*.fbx'))
# files = [Path('G:/Firestorm/mocap-ai/data/fbx/mes-1/HangoutSpot_1_003.fbx')]
# # Create Paths to new files that will contain all data.
# train_all = train_folder / 'ALL.h5'
# test_all = test_folder / 'ALL.h5'
with multiprocessing.Pool(1) as pool:
pool.map(process_fbx_file, files)
# print('--- FINAL ---')
# # Just to be sure, print the shapes of the final results.
# with utils.Timer('Loading training data took'):
# print(f"Final train shape: {utils.h5_to_array4d(train_all).shape}")
#
# with utils.Timer('Loading testing data took'):
# print(f"Final test shape: {utils.combined_test_h5_to_array4d(test_all).shape}")
if __name__ == '__main__':
with utils.Timer('Full execution took'):
process_fbx_files(source)